]> git.proxmox.com Git - mirror_ubuntu-disco-kernel.git/commitdiff
Merge ra.kernel.org:/pub/scm/linux/kernel/git/davem/net
authorDavid S. Miller <davem@davemloft.net>
Sun, 5 Aug 2018 20:04:31 +0000 (13:04 -0700)
committerDavid S. Miller <davem@davemloft.net>
Sun, 5 Aug 2018 20:04:31 +0000 (13:04 -0700)
Lots of overlapping changes, mostly trivial in nature.

The mlxsw conflict was resolving using the example
resolution at:

https://github.com/jpirko/linux_mlxsw/blob/combined_queue/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c

Signed-off-by: David S. Miller <davem@davemloft.net>
1237 files changed:
Documentation/ABI/stable/sysfs-class-rfkill
Documentation/ABI/testing/sysfs-class-net-queues
Documentation/bpf/bpf_devel_QA.rst
Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.txt
Documentation/devicetree/bindings/net/can/xilinx_can.txt
Documentation/devicetree/bindings/net/dsa/realtek-smi.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/fsl-fman.txt
Documentation/devicetree/bindings/net/rockchip-dwmac.txt
Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
Documentation/devicetree/bindings/vendor-prefixes.txt
Documentation/networking/00-INDEX
Documentation/networking/alias.rst [new file with mode: 0644]
Documentation/networking/alias.txt [deleted file]
Documentation/networking/bridge.rst [new file with mode: 0644]
Documentation/networking/bridge.txt [deleted file]
Documentation/networking/can_ucan_protocol.rst [new file with mode: 0644]
Documentation/networking/index.rst
Documentation/networking/ip-sysctl.txt
Documentation/networking/net_failover.rst
Documentation/networking/netdev-FAQ.rst [new file with mode: 0644]
Documentation/networking/netdev-FAQ.txt [deleted file]
Documentation/networking/scaling.txt
Documentation/networking/ti-cpsw.txt [new file with mode: 0644]
Documentation/process/stable-kernel-rules.rst
Documentation/process/submitting-patches.rst
Documentation/rfkill.txt
MAINTAINERS
arch/alpha/include/uapi/asm/socket.h
arch/arm/boot/dts/gemini-dlink-dir-685.dts
arch/arm/net/bpf_jit_32.c
arch/arm/net/bpf_jit_32.h
arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi
arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi
arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi
arch/ia64/include/uapi/asm/socket.h
arch/mips/include/uapi/asm/socket.h
arch/parisc/include/uapi/asm/socket.h
arch/powerpc/boot/dts/fsl/qoriq-fman-0.dtsi
arch/powerpc/boot/dts/fsl/qoriq-fman-1.dtsi
arch/powerpc/boot/dts/fsl/qoriq-fman3-0.dtsi
arch/powerpc/boot/dts/fsl/qoriq-fman3-1.dtsi
arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
arch/s390/include/uapi/asm/socket.h
arch/sparc/include/uapi/asm/socket.h
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
arch/xtensa/include/uapi/asm/socket.h
crypto/af_alg.c
drivers/atm/zatm.c
drivers/base/core.c
drivers/block/nbd.c
drivers/connector/connector.c
drivers/crypto/chelsio/chtls/chtls_cm.c
drivers/hwmon/hwmon.c
drivers/infiniband/hw/hfi1/vnic_main.c
drivers/infiniband/hw/mlx5/Kconfig
drivers/infiniband/hw/mlx5/cmd.c
drivers/infiniband/hw/mlx5/cmd.h
drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
drivers/isdn/capi/capi.c
drivers/isdn/capi/capidrv.c
drivers/isdn/gigaset/bas-gigaset.c
drivers/isdn/hardware/mISDN/avmfritz.c
drivers/isdn/hardware/mISDN/hfcpci.c
drivers/isdn/hardware/mISDN/hfcsusb.c
drivers/isdn/hardware/mISDN/mISDNinfineon.c
drivers/isdn/hardware/mISDN/mISDNisar.c
drivers/isdn/hardware/mISDN/netjet.c
drivers/isdn/hisax/avm_pci.c
drivers/isdn/hisax/callc.c
drivers/isdn/hisax/config.c
drivers/isdn/hisax/gazel.c
drivers/isdn/hisax/hfc_usb.c
drivers/isdn/hisax/isar.c
drivers/isdn/hisax/l3_1tr6.c
drivers/isdn/hisax/l3dss1.c
drivers/isdn/hisax/st5481_usb.c
drivers/isdn/hysdn/hysdn_boot.c
drivers/isdn/i4l/isdn_tty.c
drivers/isdn/i4l/isdn_v110.c
drivers/isdn/mISDN/stack.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bond_sysfs.c
drivers/net/can/cc770/cc770.c
drivers/net/can/dev.c
drivers/net/can/flexcan.c
drivers/net/can/janz-ican3.c
drivers/net/can/peak_canfd/peak_canfd.c
drivers/net/can/peak_canfd/peak_pciefd_main.c
drivers/net/can/sja1000/peak_pci.c
drivers/net/can/sja1000/peak_pcmcia.c
drivers/net/can/sun4i_can.c
drivers/net/can/usb/Kconfig
drivers/net/can/usb/Makefile
drivers/net/can/usb/kvaser_usb.c [deleted file]
drivers/net/can/usb/kvaser_usb/Makefile [new file with mode: 0644]
drivers/net/can/usb/kvaser_usb/kvaser_usb.h [new file with mode: 0644]
drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c [new file with mode: 0644]
drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c [new file with mode: 0644]
drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c [new file with mode: 0644]
drivers/net/can/usb/peak_usb/pcan_usb.c
drivers/net/can/usb/peak_usb/pcan_usb_core.c
drivers/net/can/usb/peak_usb/pcan_usb_pro.c
drivers/net/can/usb/ucan.c [new file with mode: 0644]
drivers/net/can/xilinx_can.c
drivers/net/dsa/Kconfig
drivers/net/dsa/Makefile
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/bcm_sf2_cfp.c
drivers/net/dsa/bcm_sf2_regs.h
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/chip.h
drivers/net/dsa/mv88e6xxx/global2.h
drivers/net/dsa/mv88e6xxx/global2_avb.c
drivers/net/dsa/mv88e6xxx/hwtstamp.c
drivers/net/dsa/mv88e6xxx/hwtstamp.h
drivers/net/dsa/mv88e6xxx/ptp.c
drivers/net/dsa/mv88e6xxx/ptp.h
drivers/net/dsa/realtek-smi.c [new file with mode: 0644]
drivers/net/dsa/realtek-smi.h [new file with mode: 0644]
drivers/net/dsa/rtl8366.c [new file with mode: 0644]
drivers/net/dsa/rtl8366rb.c [new file with mode: 0644]
drivers/net/dsa/vitesse-vsc73xx.c [new file with mode: 0644]
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/adaptec/starfire.c
drivers/net/ethernet/alteon/acenic.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/amd/pcnet32.c
drivers/net/ethernet/amd/xgbe/xgbe-desc.c
drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
drivers/net/ethernet/aquantia/atlantic/aq_hw.h
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
drivers/net/ethernet/aquantia/atlantic/ver.h
drivers/net/ethernet/atheros/alx/main.c
drivers/net/ethernet/aurora/Kconfig
drivers/net/ethernet/aurora/nb8800.c
drivers/net/ethernet/broadcom/Kconfig
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bgmac.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
drivers/net/ethernet/broadcom/cnic.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/cadence/Kconfig
drivers/net/ethernet/cavium/Kconfig
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
drivers/net/ethernet/cavium/liquidio/octeon_console.c
drivers/net/ethernet/cavium/liquidio/octeon_device.h
drivers/net/ethernet/cavium/liquidio/request_manager.c
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
drivers/net/ethernet/chelsio/cxgb4/sched.c
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c
drivers/net/ethernet/cisco/enic/vnic_dev.c
drivers/net/ethernet/cisco/enic/vnic_rq.c
drivers/net/ethernet/cisco/enic/vnic_wq.c
drivers/net/ethernet/cortina/gemini.c
drivers/net/ethernet/emulex/benet/be.h
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fec_ptp.c
drivers/net/ethernet/freescale/fman/fman.c
drivers/net/ethernet/freescale/fman/fman.h
drivers/net/ethernet/freescale/fman/fman_dtsec.c
drivers/net/ethernet/freescale/fman/fman_dtsec.h
drivers/net/ethernet/freescale/fman/fman_memac.c
drivers/net/ethernet/freescale/fman/fman_memac.h
drivers/net/ethernet/freescale/fman/fman_port.c
drivers/net/ethernet/freescale/fman/fman_port.h
drivers/net/ethernet/freescale/fman/fman_tgec.c
drivers/net/ethernet/freescale/fman/fman_tgec.h
drivers/net/ethernet/freescale/fman/mac.c
drivers/net/ethernet/freescale/fman/mac.h
drivers/net/ethernet/freescale/fs_enet/mac-fec.c
drivers/net/ethernet/freescale/gianfar_ethtool.c
drivers/net/ethernet/freescale/ucc_geth.c
drivers/net/ethernet/hisilicon/Kconfig
drivers/net/ethernet/hisilicon/hip04_eth.c
drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
drivers/net/ethernet/hisilicon/hns/hns_enet.c
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hnae3.c
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/igb/e1000_82575.c
drivers/net/ethernet/intel/igb/e1000_defines.h
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe.h
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/jme.c
drivers/net/ethernet/lantiq_etop.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/mvneta_bm.c
drivers/net/ethernet/marvell/mvneta_bm.h
drivers/net/ethernet/marvell/mvpp2/Makefile
drivers/net/ethernet/marvell/mvpp2/mvpp2.h
drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c [new file with mode: 0644]
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mediatek/mtk_eth_soc.h
drivers/net/ethernet/mellanox/mlx4/Makefile
drivers/net/ethernet/mellanox/mlx4/catas.c
drivers/net/ethernet/mellanox/mlx4/crdump.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/fw.h
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/profile.c
drivers/net/ethernet/mellanox/mlx5/core/Kconfig
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/net/ethernet/mellanox/mlx5/core/vxlan.c [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/vxlan.h [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/wq.h
drivers/net/ethernet/mellanox/mlxsw/Kconfig
drivers/net/ethernet/mellanox/mlxsw/Makefile
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
drivers/net/ethernet/mellanox/mlxsw/pci.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/resources.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h [deleted file]
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/mellanox/mlxsw/trap.h
drivers/net/ethernet/micrel/ksz884x.c
drivers/net/ethernet/microchip/Makefile
drivers/net/ethernet/microchip/lan743x_ethtool.c [new file with mode: 0644]
drivers/net/ethernet/microchip/lan743x_ethtool.h [new file with mode: 0644]
drivers/net/ethernet/microchip/lan743x_main.c
drivers/net/ethernet/microchip/lan743x_main.h
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/mscc/ocelot.h
drivers/net/ethernet/mscc/ocelot_board.c
drivers/net/ethernet/neterion/Kconfig
drivers/net/ethernet/neterion/vxge/vxge-config.c
drivers/net/ethernet/netronome/nfp/bpf/jit.c
drivers/net/ethernet/netronome/nfp/bpf/main.c
drivers/net/ethernet/netronome/nfp/bpf/main.h
drivers/net/ethernet/netronome/nfp/bpf/offload.c
drivers/net/ethernet/netronome/nfp/bpf/verifier.c
drivers/net/ethernet/netronome/nfp/flower/action.c
drivers/net/ethernet/netronome/nfp/flower/cmsg.h
drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
drivers/net/ethernet/netronome/nfp/flower/main.h
drivers/net/ethernet/netronome/nfp/flower/metadata.c
drivers/net/ethernet/netronome/nfp/flower/offload.c
drivers/net/ethernet/netronome/nfp/nfp_app.c
drivers/net/ethernet/netronome/nfp/nfp_app.h
drivers/net/ethernet/netronome/nfp/nfp_asm.h
drivers/net/ethernet/netronome/nfp/nfp_main.c
drivers/net/ethernet/netronome/nfp/nfp_net.h
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
drivers/net/ethernet/nvidia/forcedeth.c
drivers/net/ethernet/oki-semi/pch_gbe/Makefile
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_api.c [deleted file]
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_api.h [deleted file]
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.h
drivers/net/ethernet/packetengines/Kconfig
drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
drivers/net/ethernet/qlogic/qed/qed_cxt.c
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qed/qed_debug.c
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_hsi.h
drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
drivers/net/ethernet/qlogic/qed/qed_iscsi.c
drivers/net/ethernet/qlogic/qed/qed_iwarp.c
drivers/net/ethernet/qlogic/qed/qed_ll2.c
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_mcp.h
drivers/net/ethernet/qlogic/qed/qed_rdma.c
drivers/net/ethernet/qlogic/qed/qed_roce.c
drivers/net/ethernet/qlogic/qed/qed_sriov.c
drivers/net/ethernet/qlogic/qed/qed_vf.c
drivers/net/ethernet/qlogic/qede/qede_ethtool.c
drivers/net/ethernet/qlogic/qede/qede_filter.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
drivers/net/ethernet/realtek/Kconfig
drivers/net/ethernet/realtek/r8169.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/renesas/sh_eth.h
drivers/net/ethernet/sfc/Makefile
drivers/net/ethernet/sfc/ef10_sriov.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/net_driver.h
drivers/net/ethernet/sfc/rx.c
drivers/net/ethernet/smsc/epic100.c
drivers/net/ethernet/socionext/netsec.c
drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
drivers/net/ethernet/stmicro/stmmac/hwif.h
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
drivers/net/ethernet/sun/ldmvsw.c
drivers/net/ethernet/sun/niu.c
drivers/net/ethernet/sun/sunvnet.c
drivers/net/ethernet/tehuti/tehuti.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/cpts.c
drivers/net/ethernet/ti/davinci_cpdma.c
drivers/net/ethernet/ti/netcp_core.c
drivers/net/ethernet/xilinx/xilinx_emaclite.c
drivers/net/fjes/fjes_main.c
drivers/net/geneve.c
drivers/net/gtp.c
drivers/net/hamradio/6pack.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc_drv.c
drivers/net/macvlan.c
drivers/net/net_failover.c
drivers/net/netdevsim/Makefile
drivers/net/netdevsim/bpf.c
drivers/net/netdevsim/ipsec.c [new file with mode: 0644]
drivers/net/netdevsim/netdev.c
drivers/net/netdevsim/netdevsim.h
drivers/net/ntb_netdev.c
drivers/net/phy/Kconfig
drivers/net/phy/dp83tc811.c
drivers/net/phy/fixed_phy.c
drivers/net/phy/marvell.c
drivers/net/phy/mdio-mux-bcm-iproc.c
drivers/net/phy/mdio-mux-gpio.c
drivers/net/phy/mscc.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/realtek.c
drivers/net/phy/sfp.c
drivers/net/phy/vitesse.c
drivers/net/phy/xilinx_gmii2rgmii.c
drivers/net/ppp/ppp_mppe.c
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/usb/asix_devices.c
drivers/net/usb/catc.c
drivers/net/usb/cdc-phonet.c
drivers/net/usb/hso.c
drivers/net/usb/kaweth.c
drivers/net/usb/lan78xx.c
drivers/net/usb/pegasus.c
drivers/net/usb/r8152.c
drivers/net/usb/rtl8150.c
drivers/net/usb/sr9700.c
drivers/net/virtio_net.c
drivers/net/vxlan.c
drivers/net/wan/farsync.c
drivers/net/wan/fsl_ucc_hdlc.c
drivers/net/wan/lmc/lmc_main.c
drivers/net/wimax/i2400m/control.c
drivers/net/wimax/i2400m/fw.c
drivers/net/wimax/i2400m/netdev.c
drivers/net/wireless/ath/ath10k/Kconfig
drivers/net/wireless/ath/ath10k/ce.c
drivers/net/wireless/ath/ath10k/ce.h
drivers/net/wireless/ath/ath10k/core.c
drivers/net/wireless/ath/ath10k/core.h
drivers/net/wireless/ath/ath10k/debug.c
drivers/net/wireless/ath/ath10k/htt_tx.c
drivers/net/wireless/ath/ath10k/hw.h
drivers/net/wireless/ath/ath10k/mac.c
drivers/net/wireless/ath/ath10k/pci.h
drivers/net/wireless/ath/ath10k/snoc.c
drivers/net/wireless/ath/ath10k/snoc.h
drivers/net/wireless/ath/ath10k/spectral.c
drivers/net/wireless/ath/ath10k/wmi.c
drivers/net/wireless/ath/ath5k/pcu.c
drivers/net/wireless/ath/ath6kl/cfg80211.c
drivers/net/wireless/ath/ath6kl/sdio.c
drivers/net/wireless/ath/ath9k/ar5008_phy.c
drivers/net/wireless/ath/ath9k/ar9002_phy.c
drivers/net/wireless/ath/ath9k/debug.c
drivers/net/wireless/ath/ath9k/hw.c
drivers/net/wireless/ath/ath9k/main.c
drivers/net/wireless/ath/wil6210/cfg80211.c
drivers/net/wireless/atmel/atmel.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c
drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c
drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_qmath.c
drivers/net/wireless/intel/ipw2x00/ipw2100.c
drivers/net/wireless/intel/ipw2x00/ipw2100.h
drivers/net/wireless/intel/ipw2x00/ipw2200.c
drivers/net/wireless/intel/ipw2x00/ipw2200.h
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/marvell/libertas/cfg.c
drivers/net/wireless/marvell/libertas/dev.h
drivers/net/wireless/marvell/libertas/if_sdio.c
drivers/net/wireless/marvell/libertas/if_usb.c
drivers/net/wireless/marvell/libertas_tf/if_usb.c
drivers/net/wireless/marvell/mwifiex/cfg80211.c
drivers/net/wireless/marvell/mwifiex/main.c
drivers/net/wireless/marvell/mwifiex/uap_txrx.c
drivers/net/wireless/mediatek/mt76/mt76.h
drivers/net/wireless/mediatek/mt76/mt76x2.h
drivers/net/wireless/mediatek/mt76/mt76x2_debugfs.c
drivers/net/wireless/mediatek/mt76/mt76x2_dfs.c
drivers/net/wireless/mediatek/mt76/mt76x2_dfs.h
drivers/net/wireless/mediatek/mt76/mt76x2_mac.c
drivers/net/wireless/mediatek/mt76/mt76x2_main.c
drivers/net/wireless/mediatek/mt76/mt76x2_phy.c
drivers/net/wireless/mediatek/mt76/mt76x2_tx.c
drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
drivers/net/wireless/quantenna/qtnfmac/commands.c
drivers/net/wireless/quantenna/qtnfmac/core.c
drivers/net/wireless/quantenna/qtnfmac/qlink.h
drivers/net/wireless/realtek/rtlwifi/base.c
drivers/net/wireless/rndis_wlan.c
drivers/net/wireless/ti/wl18xx/debugfs.c
drivers/net/wireless/ti/wlcore/acx.c
drivers/net/wireless/ti/wlcore/cmd.c
drivers/net/wireless/ti/wlcore/debugfs.c
drivers/net/wireless/ti/wlcore/main.c
drivers/net/wireless/ti/wlcore/ps.c
drivers/net/wireless/ti/wlcore/ps.h
drivers/net/wireless/ti/wlcore/scan.c
drivers/net/wireless/ti/wlcore/sysfs.c
drivers/net/wireless/ti/wlcore/testmode.c
drivers/net/wireless/ti/wlcore/tx.c
drivers/net/wireless/ti/wlcore/vendor_cmd.c
drivers/net/wireless/ti/wlcore/wlcore.h
drivers/net/wireless/ti/wlcore/wlcore_i.h
drivers/net/wireless/zydas/zd1211rw/zd_chip.c
drivers/net/wireless/zydas/zd1211rw/zd_usb.c
drivers/net/xen-netback/interface.c
drivers/net/xen-netback/netback.c
drivers/net/xen-netfront.c
drivers/of/of_mdio.c
drivers/ptp/Kconfig
drivers/ptp/ptp_qoriq.c
drivers/s390/net/Kconfig
drivers/s390/net/Makefile
drivers/s390/net/ism.h [new file with mode: 0644]
drivers/s390/net/ism_drv.c [new file with mode: 0644]
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_core_mpc.h
drivers/s390/net/qeth_core_sys.c
drivers/s390/net/qeth_l2.h
drivers/s390/net/qeth_l2_main.c
drivers/s390/net/qeth_l3_main.c
drivers/s390/net/qeth_l3_sys.c
drivers/staging/netlogic/xlr_net.c
drivers/staging/rtl8188eu/include/wifi.h
drivers/staging/rtl8188eu/os_dep/os_intfs.c
drivers/staging/rtl8712/wifi.h
drivers/staging/rtl8723bs/include/wifi.h
drivers/staging/rtl8723bs/os_dep/os_intfs.c
drivers/staging/rtlwifi/base.c
drivers/vhost/net.c
fs/afs/rxrpc.c
fs/kernfs/dir.c
fs/kernfs/file.c
fs/kernfs/inode.c
fs/kernfs/kernfs-internal.h
fs/kernfs/symlink.c
fs/sysfs/dir.c
fs/sysfs/file.c
fs/sysfs/group.c
fs/sysfs/sysfs.h
include/linux/bitfield.h
include/linux/bpf.h
include/linux/can/dev.h
include/linux/cpumask.h
include/linux/device.h
include/linux/etherdevice.h
include/linux/fsl/ptp_qoriq.h
include/linux/hwmon.h
include/linux/ieee80211.h
include/linux/if_team.h
include/linux/inetdevice.h
include/linux/ipc.h
include/linux/ipc_namespace.h
include/linux/kernfs.h
include/linux/kobject.h
include/linux/list.h
include/linux/mlx4/device.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/mlx5_ifc_fpga.h
include/linux/mroute_base.h
include/linux/net.h
include/linux/netdev_features.h
include/linux/netdevice.h
include/linux/netfilter.h
include/linux/netfilter/nfnetlink.h
include/linux/netfilter_ipv4.h
include/linux/netfilter_ipv6.h
include/linux/netlink.h
include/linux/openvswitch.h
include/linux/phy.h
include/linux/qed/qed_if.h
include/linux/reciprocal_div.h
include/linux/rfkill.h
include/linux/rhashtable-types.h [new file with mode: 0644]
include/linux/rhashtable.h
include/linux/sctp.h
include/linux/sfp.h
include/linux/skbuff.h
include/linux/tcp.h
include/linux/udp.h
include/net/act_api.h
include/net/af_rxrpc.h
include/net/bond_3ad.h
include/net/bonding.h
include/net/busy_poll.h
include/net/cfg80211.h
include/net/dcbnl.h
include/net/devlink.h
include/net/dsa.h
include/net/dst.h
include/net/flow_dissector.h
include/net/ieee80211_radiotap.h
include/net/inet_common.h
include/net/inet_frag.h
include/net/inet_sock.h
include/net/ip.h
include/net/ip_tunnels.h
include/net/ip_vs.h
include/net/ipv6.h
include/net/ipv6_frag.h [new file with mode: 0644]
include/net/lag.h [new file with mode: 0644]
include/net/mac80211.h
include/net/net_namespace.h
include/net/netevent.h
include/net/netfilter/ipv4/nf_conntrack_ipv4.h
include/net/netfilter/nf_conntrack.h
include/net/netfilter/nf_conntrack_core.h
include/net/netfilter/nf_conntrack_count.h
include/net/netfilter/nf_conntrack_helper.h
include/net/netfilter/nf_conntrack_l3proto.h [deleted file]
include/net/netfilter/nf_conntrack_l4proto.h
include/net/netfilter/nf_conntrack_timeout.h
include/net/netfilter/nf_flow_table.h
include/net/netfilter/nf_log.h
include/net/netfilter/nf_tproxy.h
include/net/netns/hash.h
include/net/netns/ipv4.h
include/net/netns/nftables.h
include/net/pkt_cls.h
include/net/pkt_sched.h
include/net/sch_generic.h
include/net/sctp/structs.h
include/net/seg6.h
include/net/seg6_hmac.h
include/net/smc.h
include/net/sock.h
include/net/tc_act/tc_pedit.h
include/net/tc_act/tc_skbedit.h
include/net/tcp.h
include/net/tls.h
include/net/transp_v6.h
include/net/udp.h
include/net/udp_tunnel.h
include/net/xdp.h
include/net/xfrm.h
include/trace/events/net.h
include/trace/events/rxrpc.h
include/trace/events/sock.h
include/uapi/asm-generic/socket.h
include/uapi/linux/bpf.h
include/uapi/linux/can.h
include/uapi/linux/dcbnl.h
include/uapi/linux/devlink.h
include/uapi/linux/errqueue.h
include/uapi/linux/ethtool.h
include/uapi/linux/if_link.h
include/uapi/linux/ila.h
include/uapi/linux/ip.h
include/uapi/linux/l2tp.h
include/uapi/linux/mii.h
include/uapi/linux/mroute.h
include/uapi/linux/net_tstamp.h
include/uapi/linux/netconf.h
include/uapi/linux/netfilter/nf_osf.h
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/netfilter/xt_osf.h
include/uapi/linux/nl80211.h
include/uapi/linux/openvswitch.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/pkt_sched.h
include/uapi/linux/rds.h
include/uapi/linux/rtnetlink.h
include/uapi/linux/sctp.h
include/uapi/linux/smc_diag.h
include/uapi/linux/snmp.h
include/uapi/linux/tc_act/tc_pedit.h
include/uapi/linux/tc_act/tc_skbedit.h
include/uapi/linux/tc_act/tc_tunnel_key.h
include/uapi/linux/tcp.h
include/uapi/linux/tipc_netlink.h
include/uapi/linux/xfrm.h
ipc/msg.c
ipc/sem.c
ipc/shm.c
ipc/util.c
kernel/bpf/cgroup.c
kernel/bpf/core.c
kernel/bpf/offload.c
kernel/bpf/sockmap.c
kernel/bpf/verifier.c
kernel/cgroup/cgroup.c
lib/Kconfig.debug
lib/Makefile
lib/kobject.c
lib/nlattr.c
lib/reciprocal_div.c
lib/rhashtable.c
lib/test_bitfield.c [new file with mode: 0644]
lib/test_rhashtable.c
net/8021q/Makefile
net/8021q/vlan.c
net/Kconfig
net/atm/common.c
net/atm/mpoa_proc.c
net/ax25/ax25_addr.c
net/ax25/ax25_ds_in.c
net/ax25/ax25_ds_subr.c
net/ax25/ax25_ip.c
net/ax25/ax25_out.c
net/batman-adv/Kconfig
net/batman-adv/bat_iv_ogm.h
net/batman-adv/bat_v_ogm.h
net/batman-adv/bridge_loop_avoidance.c
net/batman-adv/debugfs.c
net/batman-adv/originator.c
net/batman-adv/types.h
net/bpfilter/Kconfig
net/bridge/br_forward.c
net/bridge/br_if.c
net/bridge/br_netlink.c
net/bridge/br_private.h
net/bridge/br_sysfs_if.c
net/bridge/netfilter/nft_reject_bridge.c
net/caif/caif_socket.c
net/core/datagram.c
net/core/dev.c
net/core/dev_ioctl.c
net/core/devlink.c
net/core/ethtool.c
net/core/fib_rules.c
net/core/filter.c
net/core/flow_dissector.c
net/core/neighbour.c
net/core/net-sysfs.c
net/core/net_namespace.c
net/core/pktgen.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sock.c
net/core/utils.c
net/core/xdp.c
net/dcb/dcbnl.c
net/dccp/proto.c
net/decnet/Kconfig
net/decnet/Makefile
net/decnet/TODO
net/decnet/dn_fib.c
net/decnet/dn_nsp_in.c
net/decnet/dn_nsp_out.c
net/decnet/dn_route.c
net/decnet/dn_rules.c
net/decnet/netfilter/Makefile
net/decnet/netfilter/dn_rtmsg.c
net/dns_resolver/dns_key.c
net/dsa/dsa2.c
net/dsa/slave.c
net/dsa/switch.c
net/ethernet/eth.c
net/ieee802154/6lowpan/reassembly.c
net/ieee802154/core.c
net/ieee802154/nl_policy.c
net/ipv4/Kconfig
net/ipv4/Makefile
net/ipv4/af_inet.c
net/ipv4/bpfilter/Makefile
net/ipv4/devinet.c
net/ipv4/esp4_offload.c
net/ipv4/fou.c
net/ipv4/gre_offload.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/inet_fragment.c
net/ipv4/ip_forward.c
net/ipv4/ip_gre.c
net/ipv4/ip_input.c
net/ipv4/ip_output.c
net/ipv4/ipmr.c
net/ipv4/ipmr_base.c
net/ipv4/netfilter.c
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c [deleted file]
net/ipv4/netfilter/nf_conntrack_proto_icmp.c [deleted file]
net/ipv4/netfilter/nf_log_ipv4.c
net/ipv4/ping.c
net/ipv4/proc.c
net/ipv4/raw.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_offload.c
net/ipv4/tcp_output.c
net/ipv4/tcp_rate.c
net/ipv4/tcp_recovery.c
net/ipv4/tcp_timer.c
net/ipv4/udp.c
net/ipv4/udp_offload.c
net/ipv6/Kconfig
net/ipv6/addrconf.c
net/ipv6/af_inet6.c
net/ipv6/datagram.c
net/ipv6/esp6_offload.c
net/ipv6/icmp.c
net/ipv6/ila/Makefile
net/ipv6/ila/ila.h
net/ipv6/ila/ila_common.c
net/ipv6/ila/ila_main.c [new file with mode: 0644]
net/ipv6/ila/ila_xlat.c
net/ipv6/ip6_flowlabel.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_input.c
net/ipv6/ip6_offload.c
net/ipv6/ip6_output.c
net/ipv6/ip6mr.c
net/ipv6/ipv6_sockglue.c
net/ipv6/mcast.c
net/ipv6/netfilter.c
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/Makefile
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c [deleted file]
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c [deleted file]
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
net/ipv6/netfilter/nf_log_ipv6.c
net/ipv6/ping.c
net/ipv6/raw.c
net/ipv6/reassembly.c
net/ipv6/seg6.c
net/ipv6/seg6_hmac.c
net/ipv6/seg6_local.c
net/ipv6/tcpv6_offload.c
net/ipv6/udp.c
net/ipv6/udp_offload.c
net/ipv6/xfrm6_mode_ro.c
net/iucv/af_iucv.c
net/kcm/Kconfig
net/kcm/kcmsock.c
net/key/af_key.c
net/l2tp/l2tp_core.c
net/l2tp/l2tp_core.h
net/l2tp/l2tp_debugfs.c
net/l2tp/l2tp_eth.c
net/l2tp/l2tp_ip.c
net/l2tp/l2tp_ip6.c
net/l2tp/l2tp_netlink.c
net/l2tp/l2tp_ppp.c
net/llc/Kconfig
net/llc/Makefile
net/llc/llc_if.c
net/mac80211/Makefile
net/mac80211/agg-rx.c
net/mac80211/agg-tx.c
net/mac80211/cfg.c
net/mac80211/ethtool.c
net/mac80211/he.c [new file with mode: 0644]
net/mac80211/ht.c
net/mac80211/ieee80211_i.h
net/mac80211/iface.c
net/mac80211/key.c
net/mac80211/main.c
net/mac80211/mlme.c
net/mac80211/offchannel.c
net/mac80211/rc80211_minstrel.c
net/mac80211/rx.c
net/mac80211/scan.c
net/mac80211/sta_info.c
net/mac80211/sta_info.h
net/mac80211/trace.h
net/mac80211/tx.c
net/mac80211/util.c
net/mpls/mpls_iptunnel.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/core.c
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_proto.c
net/netfilter/ipvs/ip_vs_proto_sctp.c
net/netfilter/ipvs/ip_vs_proto_tcp.c
net/netfilter/ipvs/ip_vs_proto_udp.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/nf_conncount.c
net/netfilter/nf_conntrack_broadcast.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_expect.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_l3proto_generic.c [deleted file]
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto.c
net/netfilter/nf_conntrack_proto_dccp.c
net/netfilter/nf_conntrack_proto_generic.c
net/netfilter/nf_conntrack_proto_gre.c
net/netfilter/nf_conntrack_proto_icmp.c [new file with mode: 0644]
net/netfilter/nf_conntrack_proto_icmpv6.c [new file with mode: 0644]
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_conntrack_proto_udp.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_log_common.c
net/netfilter/nf_nat_core.c
net/netfilter/nf_osf.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nft_chain_filter.c
net/netfilter/nft_connlimit.c
net/netfilter/nft_ct.c
net/netfilter/nft_dynset.c
net/netfilter/nft_meta.c
net/netfilter/nft_socket.c
net/netfilter/utils.c
net/netfilter/xt_CT.c
net/netfilter/xt_TEE.c
net/netfilter/xt_TPROXY.c
net/netfilter/xt_cgroup.c
net/netfilter/xt_owner.c
net/netfilter/xt_recent.c
net/netfilter/xt_socket.c
net/netlink/af_netlink.c
net/nfc/llcp_sock.c
net/openvswitch/actions.c
net/openvswitch/conntrack.c
net/openvswitch/flow_netlink.c
net/packet/af_packet.c
net/rds/Kconfig
net/rds/Makefile
net/rds/af_rds.c
net/rds/bind.c
net/rds/cong.c
net/rds/connection.c
net/rds/ib.c
net/rds/ib.h
net/rds/ib_cm.c
net/rds/ib_mr.h
net/rds/ib_rdma.c
net/rds/ib_recv.c
net/rds/ib_send.c
net/rds/loop.c
net/rds/message.c
net/rds/rdma.c
net/rds/rdma_transport.c
net/rds/rdma_transport.h
net/rds/rds.h
net/rds/recv.c
net/rds/send.c
net/rds/tcp.c
net/rds/tcp.h
net/rds/tcp_connect.c
net/rds/tcp_listen.c
net/rds/tcp_recv.c
net/rds/tcp_send.c
net/rds/threads.c
net/rds/transport.c
net/rxrpc/af_rxrpc.c
net/rxrpc/ar-internal.h
net/rxrpc/call_event.c
net/rxrpc/conn_client.c
net/rxrpc/conn_event.c
net/rxrpc/input.c
net/rxrpc/local_event.c
net/rxrpc/output.c
net/rxrpc/proc.c
net/rxrpc/recvmsg.c
net/rxrpc/rxkad.c
net/sched/Kconfig
net/sched/Makefile
net/sched/act_api.c
net/sched/act_bpf.c
net/sched/act_connmark.c
net/sched/act_csum.c
net/sched/act_gact.c
net/sched/act_ife.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/act_sample.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/act_skbmod.c
net/sched/act_tunnel_key.c
net/sched/act_vlan.c
net/sched/cls_api.c
net/sched/cls_basic.c
net/sched/cls_bpf.c
net/sched/cls_flower.c
net/sched/cls_matchall.c
net/sched/cls_u32.c
net/sched/sch_api.c
net/sched/sch_cake.c [new file with mode: 0644]
net/sched/sch_cbs.c
net/sched/sch_etf.c [new file with mode: 0644]
net/sched/sch_htb.c
net/sched/sch_netem.c
net/sched/sch_skbprio.c [new file with mode: 0644]
net/sctp/Kconfig
net/sctp/associola.c
net/sctp/input.c
net/sctp/ipv6.c
net/sctp/protocol.c
net/sctp/sm_sideeffect.c
net/sctp/socket.c
net/smc/Makefile
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_cdc.c
net/smc/smc_cdc.h
net/smc/smc_clc.c
net/smc/smc_clc.h
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_diag.c
net/smc/smc_ib.c
net/smc/smc_ib.h
net/smc/smc_ism.c [new file with mode: 0644]
net/smc/smc_ism.h [new file with mode: 0644]
net/smc/smc_llc.c
net/smc/smc_llc.h
net/smc/smc_pnet.c
net/smc/smc_pnet.h
net/smc/smc_rx.c
net/smc/smc_tx.c
net/smc/smc_tx.h
net/smc/smc_wr.c
net/socket.c
net/strparser/strparser.c
net/sunrpc/auth_gss/auth_gss.c
net/tipc/bcast.c
net/tipc/bearer.c
net/tipc/group.c
net/tipc/group.h
net/tipc/link.c
net/tipc/link.h
net/tipc/monitor.c
net/tipc/msg.c
net/tipc/name_table.c
net/tipc/node.c
net/tipc/node.h
net/tipc/socket.c
net/tls/tls_device.c
net/tls/tls_device_fallback.c
net/tls/tls_main.c
net/tls/tls_sw.c
net/unix/af_unix.c
net/wimax/Makefile
net/wimax/debugfs.c
net/wimax/op-msg.c
net/wimax/stack.c
net/wireless/core.c
net/wireless/core.h
net/wireless/lib80211_crypt_tkip.c
net/wireless/nl80211.c
net/wireless/sysfs.c
net/wireless/util.c
net/wireless/wext-compat.c
net/x25/Kconfig
net/x25/x25_subr.c
net/xdp/xdp_umem.c
net/xfrm/Kconfig
net/xfrm/Makefile
net/xfrm/xfrm_device.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_interface.c [new file with mode: 0644]
net/xfrm/xfrm_output.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
samples/bpf/Makefile
samples/bpf/bpf_load.c
samples/bpf/test_cgrp2_sock2.c
samples/bpf/xdp_redirect_cpu_kern.c
samples/bpf/xdp_rxq_info_kern.c
samples/bpf/xdp_rxq_info_user.c
samples/bpf/xdp_sample_pkts_kern.c [new file with mode: 0644]
samples/bpf/xdp_sample_pkts_user.c [new file with mode: 0644]
security/selinux/nlmsgtab.c
tools/bpf/Makefile.helpers [new file with mode: 0644]
tools/bpf/bpftool/Documentation/Makefile
tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
tools/bpf/bpftool/Documentation/bpftool-prog.rst
tools/bpf/bpftool/Makefile
tools/bpf/bpftool/bash-completion/bpftool
tools/bpf/bpftool/btf_dumper.c [new file with mode: 0644]
tools/bpf/bpftool/cgroup.c
tools/bpf/bpftool/common.c
tools/bpf/bpftool/main.c
tools/bpf/bpftool/main.h
tools/bpf/bpftool/map.c
tools/bpf/bpftool/prog.c
tools/bpf/bpftool/xlated_dumper.c
tools/build/Makefile.feature
tools/build/feature/Makefile
tools/build/feature/test-reallocarray.c [new file with mode: 0644]
tools/include/linux/compiler-gcc.h
tools/include/linux/overflow.h [new file with mode: 0644]
tools/include/tools/libc_compat.h [new file with mode: 0644]
tools/include/uapi/linux/bpf.h
tools/lib/bpf/Build
tools/lib/bpf/Makefile
tools/lib/bpf/btf.c
tools/lib/bpf/btf.h
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf_errno.c [new file with mode: 0644]
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/cgroup_helpers.c
tools/testing/selftests/bpf/cgroup_helpers.h
tools/testing/selftests/bpf/test_offload.py
tools/testing/selftests/bpf/test_sock_addr.c
tools/testing/selftests/bpf/test_tcpbpf.h
tools/testing/selftests/bpf/test_tcpbpf_kern.c
tools/testing/selftests/bpf/test_tcpbpf_user.c
tools/testing/selftests/bpf/trace_helpers.c
tools/testing/selftests/bpf/trace_helpers.h
tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/mlxsw/router_scale.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh [new file with mode: 0644]
tools/testing/selftests/net/.gitignore
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/forwarding/README
tools/testing/selftests/net/forwarding/bridge_port_isolation.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/devlink_lib.sh [new file with mode: 0644]
tools/testing/selftests/net/forwarding/gre_multipath.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/lib.sh
tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
tools/testing/selftests/net/forwarding/mirror_lib.sh
tools/testing/selftests/net/forwarding/router_bridge.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/router_bridge_vlan.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/router_broadcast.sh [new file with mode: 0755]
tools/testing/selftests/net/forwarding/router_multipath.sh
tools/testing/selftests/net/forwarding/tc_chains.sh
tools/testing/selftests/net/forwarding/tc_shblocks.sh
tools/testing/selftests/net/ip6_gre_headroom.sh [new file with mode: 0755]
tools/testing/selftests/net/rtnetlink.sh
tools/testing/selftests/net/tls.c [new file with mode: 0644]
tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json [new file with mode: 0644]
tools/testing/selftests/tc-testing/tc-tests/filters/fw.json [new file with mode: 0644]

index e1ba4a10475364cd8fcf201b323c26de5ee8b62b..80151a409d67ce02775e0fc249d09b075ac8ec6a 100644 (file)
@@ -11,7 +11,7 @@ KernelVersion:        v2.6.22
 Contact:       linux-wireless@vger.kernel.org,
 Description:   The rfkill class subsystem folder.
                Each registered rfkill driver is represented by an rfkillX
-               subfolder (X being an integer > 0).
+               subfolder (X being an integer >= 0).
 
 
 What:          /sys/class/rfkill/rfkill[0-9]+/name
@@ -48,8 +48,8 @@ Contact:      linux-wireless@vger.kernel.org
 Description:   Current state of the transmitter.
                This file was scheduled to be removed in 2014, but due to its
                large number of users it will be sticking around for a bit
-               longer. Despite it being marked as stabe, the newer "hard" and
-               "soft" interfaces should be preffered, since it is not possible
+               longer. Despite it being marked as stable, the newer "hard" and
+               "soft" interfaces should be preferred, since it is not possible
                to express the 'soft and hard block' state of the rfkill driver
                through this interface. There will likely be another attempt to
                remove it in the future.
index 0c0df91b1516fb087ae22f82a43dafa3fb44e858..978b76358661a91470436e73842abde367d7d70e 100644 (file)
@@ -42,6 +42,17 @@ Description:
                network device transmit queue. Possible vaules depend on the
                number of available CPU(s) in the system.
 
+What:          /sys/class/<iface>/queues/tx-<queue>/xps_rxqs
+Date:          June 2018
+KernelVersion: 4.18.0
+Contact:       netdev@vger.kernel.org
+Description:
+               Mask of the receive queue(s) currently enabled to participate
+               into the Transmit Packet Steering packet processing flow for this
+               network device transmit queue. Possible values depend on the
+               number of available receive queue(s) in the network device.
+               Default is disabled.
+
 What:          /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
 Date:          November 2011
 KernelVersion: 3.3
index 0e7c1d946e83818b32eb63fe0f4999c8aec15352..c9856b927055b1e804b5087887945ea508038289 100644 (file)
@@ -106,9 +106,9 @@ into the bpf-next tree will make their way into net-next tree. net and
 net-next are both run by David S. Miller. From there, they will go
 into the kernel mainline tree run by Linus Torvalds. To read up on the
 process of net and net-next being merged into the mainline tree, see
-the `netdev FAQ`_ under:
+the :ref:`netdev-FAQ`
+
 
- `Documentation/networking/netdev-FAQ.txt`_
 
 Occasionally, to prevent merge conflicts, we might send pull requests
 to other trees (e.g. tracing) with a small subset of the patches, but
@@ -125,8 +125,8 @@ request)::
 Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to?
 ---------------------------------------------------------------------------------
 
-A: The process is the very same as described in the `netdev FAQ`_, so
-please read up on it. The subject line must indicate whether the
+A: The process is the very same as described in the :ref:`netdev-FAQ`,
+so please read up on it. The subject line must indicate whether the
 patch is a fix or rather "next-like" content in order to let the
 maintainers know whether it is targeted at bpf or bpf-next.
 
@@ -184,7 +184,7 @@ ii) run extensive BPF test suite and
 Once the BPF pull request was accepted by David S. Miller, then
 the patches end up in net or net-next tree, respectively, and
 make their way from there further into mainline. Again, see the
-`netdev FAQ`_ for additional information e.g. on how often they are
+:ref:`netdev-FAQ` for additional information e.g. on how often they are
 merged to mainline.
 
 Q: How long do I need to wait for feedback on my BPF patches?
@@ -208,7 +208,7 @@ Q: Are patches applied to bpf-next when the merge window is open?
 -----------------------------------------------------------------
 A: For the time when the merge window is open, bpf-next will not be
 processed. This is roughly analogous to net-next patch processing,
-so feel free to read up on the `netdev FAQ`_ about further details.
+so feel free to read up on the :ref:`netdev-FAQ` about further details.
 
 During those two weeks of merge window, we might ask you to resend
 your patch series once bpf-next is open again. Once Linus released
@@ -372,7 +372,7 @@ netdev kernel mailing list in Cc and ask for the fix to be queued up:
   netdev@vger.kernel.org
 
 The process in general is the same as on netdev itself, see also the
-`netdev FAQ`_ document.
+:ref:`netdev-FAQ`.
 
 Q: Do you also backport to kernels not currently maintained as stable?
 ----------------------------------------------------------------------
@@ -388,9 +388,7 @@ Q: The BPF patch I am about to submit needs to go to stable as well
 What should I do?
 
 A: The same rules apply as with netdev patch submissions in general, see
-`netdev FAQ`_ under:
-
-  `Documentation/networking/netdev-FAQ.txt`_
+the :ref:`netdev-FAQ`.
 
 Never add "``Cc: stable@vger.kernel.org``" to the patch description, but
 ask the BPF maintainers to queue the patches instead. This can be done
@@ -630,8 +628,7 @@ when:
 .. Links
 .. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
 .. _MAINTAINERS: ../../MAINTAINERS
-.. _Documentation/networking/netdev-FAQ.txt: ../networking/netdev-FAQ.txt
-.. _netdev FAQ: ../networking/netdev-FAQ.txt
+.. _netdev-FAQ: ../networking/netdev-FAQ.rst
 .. _samples/bpf/: ../../samples/bpf/
 .. _selftests: ../../tools/testing/selftests/bpf/
 .. _Documentation/dev-tools/kselftest.rst:
index dfe287a5d6f292cdf5b866dddeb80e95bff31c6d..b58843f29591bf29842164897cd9cb58237e9c3a 100644 (file)
@@ -13,14 +13,17 @@ MDIO multiplexer node:
 Every non-ethernet PHY requires a compatible so that it could be probed based
 on this compatible string.
 
+Optional properties:
+- clocks: phandle of the core clock which drives the mdio block.
+
 Additional information regarding generic multiplexer properties can be found
 at- Documentation/devicetree/bindings/net/mdio-mux.txt
 
 
 for example:
-               mdio_mux_iproc: mdio-mux@6602023c {
+               mdio_mux_iproc: mdio-mux@66020000 {
                        compatible = "brcm,mdio-mux-iproc";
-                       reg = <0x6602023c 0x14>;
+                       reg = <0x66020000 0x250>;
                        #address-cells = <1>;
                        #size-cells = <0>;
 
index fe38847d8e268793a7eac961c472f62d5c81a373..ae5c07e96ad50b3d6e4829b9b5abd07cce701ab1 100644 (file)
@@ -2,20 +2,26 @@ Xilinx Axi CAN/Zynq CANPS controller Device Tree Bindings
 ---------------------------------------------------------
 
 Required properties:
-- compatible           : Should be "xlnx,zynq-can-1.0" for Zynq CAN
-                         controllers and "xlnx,axi-can-1.00.a" for Axi CAN
-                         controllers.
-- reg                  : Physical base address and size of the Axi CAN/Zynq
-                         CANPS registers map.
+- compatible           : Should be:
+                         - "xlnx,zynq-can-1.0" for Zynq CAN controllers
+                         - "xlnx,axi-can-1.00.a" for Axi CAN controllers
+                         - "xlnx,canfd-1.0" for CAN FD controllers
+- reg                  : Physical base address and size of the controller
+                         registers map.
 - interrupts           : Property with a value describing the interrupt
                          number.
 - interrupt-parent     : Must be core interrupt controller
-- clock-names          : List of input clock names - "can_clk", "pclk"
-                         (For CANPS), "can_clk" , "s_axi_aclk"(For AXI CAN)
+- clock-names          : List of input clock names
+                         - "can_clk", "pclk" (For CANPS),
+                         - "can_clk", "s_axi_aclk" (For AXI CAN and CAN FD).
                          (See clock bindings for details).
 - clocks               : Clock phandles (see clock bindings for details).
-- tx-fifo-depth                : Can Tx fifo depth.
-- rx-fifo-depth                : Can Rx fifo depth.
+- tx-fifo-depth                : Can Tx fifo depth (Zynq, Axi CAN).
+- rx-fifo-depth                : Can Rx fifo depth (Zynq, Axi CAN, CAN FD in
+                          sequential Rx mode).
+- tx-mailbox-count     : Can Tx mailbox buffer count (CAN FD).
+- rx-mailbox-count     : Can Rx mailbox buffer count (CAN FD in mailbox Rx
+                         mode).
 
 
 Example:
@@ -42,3 +48,14 @@ For Axi CAN Dts file:
                        tx-fifo-depth = <0x40>;
                        rx-fifo-depth = <0x40>;
                };
+For CAN FD Dts file:
+       canfd_0: canfd@40000000 {
+                       compatible = "xlnx,canfd-1.0";
+                       clocks = <&clkc 0>, <&clkc 1>;
+                       clock-names = "can_clk", "s_axi_aclk";
+                       reg = <0x40000000 0x2000>;
+                       interrupt-parent = <&intc>;
+                       interrupts = <0 59 1>;
+                       tx-mailbox-count = <0x20>;
+                       rx-fifo-depth = <0x20>;
+               };
diff --git a/Documentation/devicetree/bindings/net/dsa/realtek-smi.txt b/Documentation/devicetree/bindings/net/dsa/realtek-smi.txt
new file mode 100644 (file)
index 0000000..b6ae854
--- /dev/null
@@ -0,0 +1,153 @@
+Realtek SMI-based Switches
+==========================
+
+The SMI "Simple Management Interface" is a two-wire protocol using
+bit-banged GPIO that while it reuses the MDIO lines MCK and MDIO does
+not use the MDIO protocol. This binding defines how to specify the
+SMI-based Realtek devices.
+
+Required properties:
+
+- compatible: must be exactly one of:
+      "realtek,rtl8366"
+      "realtek,rtl8366rb" (4+1 ports)
+      "realtek,rtl8366s"  (4+1 ports)
+      "realtek,rtl8367"
+      "realtek,rtl8367b"
+      "realtek,rtl8368s"  (8 port)
+      "realtek,rtl8369"
+      "realtek,rtl8370"   (8 port)
+
+Required properties:
+- mdc-gpios: GPIO line for the MDC clock line.
+- mdio-gpios: GPIO line for the MDIO data line.
+- reset-gpios: GPIO line for the reset signal.
+
+Optional properties:
+- realtek,disable-leds: if the LED drivers are not used in the
+  hardware design this will disable them so they are not turned on
+  and wasting power.
+
+Required subnodes:
+
+- interrupt-controller
+
+  This defines an interrupt controller with an IRQ line (typically
+  a GPIO) that will demultiplex and handle the interrupt from the single
+  interrupt line coming out of one of the SMI-based chips. It most
+  importantly provides link up/down interrupts to the PHY blocks inside
+  the ASIC.
+
+Required properties of interrupt-controller:
+
+- interrupt: parent interrupt, see interrupt-controller/interrupts.txt
+- interrupt-controller: see interrupt-controller/interrupts.txt
+- #address-cells: should be <0>
+- #interrupt-cells: should be <1>
+
+- mdio
+
+  This defines the internal MDIO bus of the SMI device, mostly for the
+  purpose of being able to hook the interrupts to the right PHY and
+  the right PHY to the corresponding port.
+
+Required properties of mdio:
+
+- compatible: should be set to "realtek,smi-mdio" for all SMI devices
+
+See net/mdio.txt for additional MDIO bus properties.
+
+See net/dsa/dsa.txt for a list of additional required and optional properties
+and subnodes of DSA switches.
+
+Examples:
+
+switch {
+       compatible = "realtek,rtl8366rb";
+       /* 22 = MDIO (has input reads), 21 = MDC (clock, output only) */
+       mdc-gpios = <&gpio0 21 GPIO_ACTIVE_HIGH>;
+       mdio-gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>;
+       reset-gpios = <&gpio0 14 GPIO_ACTIVE_LOW>;
+
+       switch_intc: interrupt-controller {
+               /* GPIO 15 provides the interrupt */
+               interrupt-parent = <&gpio0>;
+               interrupts = <15 IRQ_TYPE_LEVEL_LOW>;
+               interrupt-controller;
+               #address-cells = <0>;
+               #interrupt-cells = <1>;
+       };
+
+       ports {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               reg = <0>;
+               port@0 {
+                       reg = <0>;
+                       label = "lan0";
+                       phy-handle = <&phy0>;
+               };
+               port@1 {
+                       reg = <1>;
+                       label = "lan1";
+                       phy-handle = <&phy1>;
+               };
+               port@2 {
+                       reg = <2>;
+                       label = "lan2";
+                       phy-handle = <&phy2>;
+               };
+               port@3 {
+                       reg = <3>;
+                       label = "lan3";
+                       phy-handle = <&phy3>;
+               };
+               port@4 {
+                       reg = <4>;
+                       label = "wan";
+                       phy-handle = <&phy4>;
+               };
+               port@5 {
+                       reg = <5>;
+                       label = "cpu";
+                       ethernet = <&gmac0>;
+                       phy-mode = "rgmii";
+                       fixed-link {
+                               speed = <1000>;
+                               full-duplex;
+                       };
+               };
+       };
+
+       mdio {
+               compatible = "realtek,smi-mdio", "dsa-mdio";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               phy0: phy@0 {
+                       reg = <0>;
+                       interrupt-parent = <&switch_intc>;
+                       interrupts = <0>;
+               };
+               phy1: phy@1 {
+                       reg = <1>;
+                       interrupt-parent = <&switch_intc>;
+                       interrupts = <1>;
+               };
+               phy2: phy@2 {
+                       reg = <2>;
+                       interrupt-parent = <&switch_intc>;
+                       interrupts = <2>;
+               };
+               phy3: phy@3 {
+                       reg = <3>;
+                       interrupt-parent = <&switch_intc>;
+                       interrupts = <3>;
+               };
+               phy4: phy@4 {
+                       reg = <4>;
+                       interrupt-parent = <&switch_intc>;
+                       interrupts = <12>;
+               };
+       };
+};
diff --git a/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.txt b/Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.txt
new file mode 100644 (file)
index 0000000..ed4710c
--- /dev/null
@@ -0,0 +1,81 @@
+Vitesse VSC73xx Switches
+========================
+
+This defines device tree bindings for the Vitesse VSC73xx switch chips.
+The Vitesse company has been acquired by Microsemi and Microsemi in turn
+acquired by Microchip but retains this vendor branding.
+
+The currently supported switch chips are:
+Vitesse VSC7385 SparX-G5 5+1-port Integrated Gigabit Ethernet Switch
+Vitesse VSC7388 SparX-G8 8-port Integrated Gigabit Ethernet Switch
+Vitesse VSC7395 SparX-G5e 5+1-port Integrated Gigabit Ethernet Switch
+Vitesse VSC7398 SparX-G8e 8-port Integrated Gigabit Ethernet Switch
+
+The device tree node is an SPI device so it must reside inside a SPI bus
+device tree node, see spi/spi-bus.txt
+
+Required properties:
+
+- compatible: must be exactly one of:
+       "vitesse,vsc7385"
+       "vitesse,vsc7388"
+       "vitesse,vsc7395"
+       "vitesse,vsc7398"
+- gpio-controller: indicates that this switch is also a GPIO controller,
+  see gpio/gpio.txt
+- #gpio-cells: this must be set to <2> and indicates that we are a twocell
+  GPIO controller, see gpio/gpio.txt
+
+Optional properties:
+
+- reset-gpios: a handle to a GPIO line that can issue reset of the chip.
+  It should be tagged as active low.
+
+Required subnodes:
+
+See net/dsa/dsa.txt for a list of additional required and optional properties
+and subnodes of DSA switches.
+
+Examples:
+
+switch@0 {
+       compatible = "vitesse,vsc7395";
+       reg = <0>;
+       /* Specified for 2.5 MHz or below */
+       spi-max-frequency = <2500000>;
+       gpio-controller;
+       #gpio-cells = <2>;
+
+       ports {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               port@0 {
+                       reg = <0>;
+                       label = "lan1";
+               };
+               port@1 {
+                       reg = <1>;
+                       label = "lan2";
+               };
+               port@2 {
+                       reg = <2>;
+                       label = "lan3";
+               };
+               port@3 {
+                       reg = <3>;
+                       label = "lan4";
+               };
+               vsc: port@6 {
+                       reg = <6>;
+                       label = "cpu";
+                       ethernet = <&gmac1>;
+                       phy-mode = "rgmii";
+                       fixed-link {
+                               speed = <1000>;
+                               full-duplex;
+                               pause;
+                       };
+               };
+       };
+};
index f8c33890bc2970e08bf44934835a9b8c464675f1..299c0dcd67db456fd8ded1b9ff71719ecaef9d2c 100644 (file)
@@ -356,30 +356,7 @@ ethernet@e0000 {
 ============================================================================
 FMan IEEE 1588 Node
 
-DESCRIPTION
-
-The FMan interface to support IEEE 1588
-
-
-PROPERTIES
-
-- compatible
-               Usage: required
-               Value type: <stringlist>
-               Definition: A standard property.
-               Must include "fsl,fman-ptp-timer".
-
-- reg
-               Usage: required
-               Value type: <prop-encoded-array>
-               Definition: A standard property.
-
-EXAMPLE
-
-ptp-timer@fe000 {
-       compatible = "fsl,fman-ptp-timer";
-       reg = <0xfe000 0x1000>;
-};
+Refer to Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
 
 =============================================================================
 FMan MDIO Node
index 9c16ee2965a2ce756acc23d6956fedc38157144b..3b71da7e87427759729fc7874d048c4c22f5eee0 100644 (file)
@@ -4,6 +4,7 @@ The device node has following properties.
 
 Required properties:
  - compatible: should be "rockchip,<name>-gamc"
+   "rockchip,px30-gmac":   found on PX30 SoCs
    "rockchip,rk3128-gmac": found on RK312x SoCs
    "rockchip,rk3228-gmac": found on RK322x SoCs
    "rockchip,rk3288-gmac": found on RK3288 SoCs
index 0f569d8e73a3cb09fa1fb9d9dcf27375e3aad6ed..c5d0e7998e2b0a09546ab068dade2176b194d864 100644 (file)
@@ -2,7 +2,8 @@
 
 General Properties:
 
-  - compatible   Should be "fsl,etsec-ptp"
+  - compatible   Should be "fsl,etsec-ptp" for eTSEC
+                 Should be "fsl,fman-ptp-timer" for DPAA FMan
   - reg          Offset and length of the register set for the device
   - interrupts   There should be at least two interrupts. Some devices
                  have as many as four PTP related interrupts.
@@ -43,14 +44,22 @@ Clock Properties:
   value, which will be directly written in those bits, that is why,
   according to reference manual, the next clock sources can be used:
 
+  For eTSEC,
   <0> - external high precision timer reference clock (TSEC_TMR_CLK
         input is used for this purpose);
   <1> - eTSEC system clock;
   <2> - eTSEC1 transmit clock;
   <3> - RTC clock input.
 
-  When this attribute is not used, eTSEC system clock will serve as
-  IEEE 1588 timer reference clock.
+  For DPAA FMan,
+  <0> - external high precision timer reference clock (TMR_1588_CLK)
+  <1> - MAC system clock (1/2 FMan clock)
+  <2> - reserved
+  <3> - RTC clock oscillator
+
+  When this attribute is not used, the IEEE 1588 timer reference clock
+  will use the eTSEC system clock (for Gianfar) or the MAC system
+  clock (for DPAA).
 
 Example:
 
index 7cad066191eeb8e6c9711cb81fd50283362fcf4d..3e5398f87eac443b8d6c544c85b8691911f37bd1 100644 (file)
@@ -395,6 +395,7 @@ v3  V3 Semiconductor
 variscite      Variscite Ltd.
 via    VIA Technologies, Inc.
 virtio Virtual I/O Device Specification, developed by the OASIS consortium
+vitesse        Vitesse Semiconductor Corporation
 vivante        Vivante Corporation
 vocore VoCore Studio
 voipac Voipac Technologies s.r.o.
index 2b89d91b376f849d91e9cf2ab3df708fc48f33d0..02a323c432612c6a4cb8101390d38f471da09a91 100644 (file)
@@ -18,8 +18,6 @@ README.ipw2200
        - README for the Intel PRO/Wireless 2915ABG and 2200BG driver.
 README.sb1000
        - info on General Instrument/NextLevel SURFboard1000 cable modem.
-alias.txt
-       - info on using alias network devices.
 altera_tse.txt
        - Altera Triple-Speed Ethernet controller.
 arcnet-hardware.txt
@@ -140,8 +138,6 @@ multiqueue.txt
        - HOWTO for multiqueue network device support.
 netconsole.txt
        - The network console module netconsole.ko: configuration and notes.
-netdev-FAQ.txt
-       - FAQ describing how to submit net changes to netdev mailing list.
 netdev-features.txt
        - Network interface features API description.
 netdevices.txt
diff --git a/Documentation/networking/alias.rst b/Documentation/networking/alias.rst
new file mode 100644 (file)
index 0000000..af7c5ee
--- /dev/null
@@ -0,0 +1,49 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+IP-Aliasing
+===========
+
+IP-aliases are an obsolete way to manage multiple IP-addresses/masks
+per interface. Newer tools such as iproute2 support multiple
+address/prefixes per interface, but aliases are still supported
+for backwards compatibility.
+
+An alias is formed by adding a colon and a string when running ifconfig.
+This string is usually numeric, but this is not a must.
+
+
+Alias creation
+==============
+
+Alias creation is done by 'magic' interface naming: eg. to create a
+200.1.1.1 alias for eth0 ...
+::
+
+  # ifconfig eth0:0 200.1.1.1  etc,etc....
+       ~~ -> request alias #0 creation (if not yet exists) for eth0
+
+The corresponding route is also set up by this command.  Please note:
+The route always points to the base interface.
+
+
+Alias deletion
+==============
+
+The alias is removed by shutting the alias down::
+
+  # ifconfig eth0:0 down
+       ~~~~~~~~~~ -> will delete alias
+
+
+Alias (re-)configuring
+======================
+
+Aliases are not real devices, but programs should be able to configure
+and refer to them as usual (ifconfig, route, etc).
+
+
+Relationship with main device
+=============================
+
+If the base device is shut down the added aliases will be deleted too.
diff --git a/Documentation/networking/alias.txt b/Documentation/networking/alias.txt
deleted file mode 100644 (file)
index 85046f5..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-
-IP-Aliasing:
-============
-
-IP-aliases are an obsolete way to manage multiple IP-addresses/masks
-per interface. Newer tools such as iproute2 support multiple
-address/prefixes per interface, but aliases are still supported
-for backwards compatibility.
-
-An alias is formed by adding a colon and a string when running ifconfig.
-This string is usually numeric, but this is not a must.
-
-o Alias creation.
-  Alias creation is done by 'magic' interface naming: eg. to create a
-  200.1.1.1 alias for eth0 ...
-  
-    # ifconfig eth0:0 200.1.1.1  etc,etc....
-                   ~~ -> request alias #0 creation (if not yet exists) for eth0
-
-    The corresponding route is also set up by this command. 
-    Please note: The route always points to the base interface.
-       
-
-o Alias deletion.
-  The alias is removed by shutting the alias down:
-
-    # ifconfig eth0:0 down
-                 ~~~~~~~~~~ -> will delete alias
-
-                                  
-o Alias (re-)configuring
-
-  Aliases are not real devices, but programs should be able to configure and
-  refer to them as usual (ifconfig, route, etc).
-
-
-o Relationship with main device
-
-  If the base device is shut down the added aliases will be deleted 
-  too.
diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst
new file mode 100644 (file)
index 0000000..4aef9cd
--- /dev/null
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Ethernet Bridging
+=================
+
+In order to use the Ethernet bridging functionality, you'll need the
+userspace tools.
+
+Documentation for Linux bridging is on:
+   http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge
+
+The bridge-utilities are maintained at:
+   git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/bridge-utils.git
+
+Additionally, the iproute2 utilities can be used to configure
+bridge devices.
+
+If you still have questions, don't hesitate to post to the mailing list 
+(more info https://lists.linux-foundation.org/mailman/listinfo/bridge).
+
diff --git a/Documentation/networking/bridge.txt b/Documentation/networking/bridge.txt
deleted file mode 100644 (file)
index a27cb62..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-In order to use the Ethernet bridging functionality, you'll need the
-userspace tools.
-
-Documentation for Linux bridging is on:
-   http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge
-
-The bridge-utilities are maintained at:
-   git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/bridge-utils.git
-
-Additionally, the iproute2 utilities can be used to configure
-bridge devices.
-
-If you still have questions, don't hesitate to post to the mailing list 
-(more info https://lists.linux-foundation.org/mailman/listinfo/bridge).
-
diff --git a/Documentation/networking/can_ucan_protocol.rst b/Documentation/networking/can_ucan_protocol.rst
new file mode 100644 (file)
index 0000000..4cef88d
--- /dev/null
@@ -0,0 +1,332 @@
+=================
+The UCAN Protocol
+=================
+
+UCAN is the protocol used by the microcontroller-based USB-CAN
+adapter that is integrated on System-on-Modules from Theobroma Systems
+and that is also available as a standalone USB stick.
+
+The UCAN protocol has been designed to be hardware-independent.
+It is modeled closely after how Linux represents CAN devices
+internally. All multi-byte integers are encoded as Little Endian.
+
+All structures mentioned in this document are defined in
+``drivers/net/can/usb/ucan.c``.
+
+USB Endpoints
+=============
+
+UCAN devices use three USB endpoints:
+
+CONTROL endpoint
+  The driver sends device management commands on this endpoint
+
+IN endpoint
+  The device sends CAN data frames and CAN error frames
+
+OUT endpoint
+  The driver sends CAN data frames on the out endpoint
+
+
+CONTROL Messages
+================
+
+UCAN devices are configured using vendor requests on the control pipe.
+
+To support multiple CAN interfaces in a single USB device all
+configuration commands target the corresponding interface in the USB
+descriptor.
+
+The driver uses ``ucan_ctrl_command_in/out`` and
+``ucan_device_request_in`` to deliver commands to the device.
+
+Setup Packet
+------------
+
+=================  =====================================================
+``bmRequestType``  Direction | Vendor | (Interface or Device)
+``bRequest``       Command Number
+``wValue``         Subcommand Number (16 Bit) or 0 if not used
+``wIndex``         USB Interface Index (0 for device commands)
+``wLength``        * Host to Device - Number of bytes to transmit
+                   * Device to Host - Maximum Number of bytes to
+                     receive. If the device send less. Commom ZLP
+                     semantics are used.
+=================  =====================================================
+
+Error Handling
+--------------
+
+The device indicates failed control commands by stalling the
+pipe.
+
+Device Commands
+---------------
+
+UCAN_DEVICE_GET_FW_STRING
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*Dev2Host; optional*
+
+Request the device firmware string.
+
+
+Interface Commands
+------------------
+
+UCAN_COMMAND_START
+~~~~~~~~~~~~~~~~~~
+
+*Host2Dev; mandatory*
+
+Bring the CAN interface up.
+
+Payload Format
+  ``ucan_ctl_payload_t.cmd_start``
+
+====  ============================
+mode  or mask of ``UCAN_MODE_*``
+====  ============================
+
+UCAN_COMMAND_STOP
+~~~~~~~~~~~~~~~~~~
+
+*Host2Dev; mandatory*
+
+Stop the CAN interface
+
+Payload Format
+  *empty*
+
+UCAN_COMMAND_RESET
+~~~~~~~~~~~~~~~~~~
+
+*Host2Dev; mandatory*
+
+Reset the CAN controller (including error counters)
+
+Payload Format
+  *empty*
+
+UCAN_COMMAND_GET
+~~~~~~~~~~~~~~~~
+
+*Host2Dev; mandatory*
+
+Get Information from the Device
+
+Subcommands
+^^^^^^^^^^^
+
+UCAN_COMMAND_GET_INFO
+  Request the device information structure ``ucan_ctl_payload_t.device_info``.
+
+  See the ``device_info`` field for details, and
+  ``uapi/linux/can/netlink.h`` for an explanation of the
+  ``can_bittiming fields``.
+
+  Payload Format
+    ``ucan_ctl_payload_t.device_info``
+
+UCAN_COMMAND_GET_PROTOCOL_VERSION
+
+  Request the device protocol version
+  ``ucan_ctl_payload_t.protocol_version``. The current protocol version is 3.
+
+  Payload Format
+    ``ucan_ctl_payload_t.protocol_version``
+
+.. note:: Devices that do not implement this command use the old
+          protocol version 1
+
+UCAN_COMMAND_SET_BITTIMING
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*Host2Dev; mandatory*
+
+Setup bittiming by sending the the structure
+``ucan_ctl_payload_t.cmd_set_bittiming`` (see ``struct bittiming`` for
+details)
+
+Payload Format
+  ``ucan_ctl_payload_t.cmd_set_bittiming``.
+
+UCAN_SLEEP/WAKE
+~~~~~~~~~~~~~~~
+
+*Host2Dev; optional*
+
+Configure sleep and wake modes. Not yet supported by the driver.
+
+UCAN_FILTER
+~~~~~~~~~~~
+
+*Host2Dev; optional*
+
+Setup hardware CAN filters. Not yet supported by the driver.
+
+Allowed interface commands
+--------------------------
+
+==================  ===================  ==================
+Legal Device State  Command              New Device State
+==================  ===================  ==================
+stopped             SET_BITTIMING        stopped
+stopped             START                started
+started             STOP or RESET        stopped
+stopped             STOP or RESET        stopped
+started             RESTART              started
+any                 GET                  *no change*
+==================  ===================  ==================
+
+IN Message Format
+=================
+
+A data packet on the USB IN endpoint contains one or more
+``ucan_message_in`` values. If multiple messages are batched in a USB
+data packet, the ``len`` field can be used to jump to the next
+``ucan_message_in`` value (take care to sanity-check the ``len`` value
+against the actual data size).
+
+.. _can_ucan_in_message_len:
+
+``len`` field
+-------------
+
+Each ``ucan_message_in`` must be aligned to a 4-byte boundary (relative
+to the start of the start of the data buffer). That means that there
+may be padding bytes between multiple ``ucan_message_in`` values:
+
+.. code::
+
+    +----------------------------+ < 0
+    |                            |
+    |   struct ucan_message_in   |
+    |                            |
+    +----------------------------+ < len
+              [padding]
+    +----------------------------+ < round_up(len, 4)
+    |                            |
+    |   struct ucan_message_in   |
+    |                            |
+    +----------------------------+
+                [...]
+
+``type`` field
+--------------
+
+The ``type`` field specifies the type of the message.
+
+UCAN_IN_RX
+~~~~~~~~~~
+
+``subtype``
+  zero
+
+Data received from the CAN bus (ID + payload).
+
+UCAN_IN_TX_COMPLETE
+~~~~~~~~~~~~~~~~~~~
+
+``subtype``
+  zero
+
+The CAN device has sent a message to the CAN bus. It answers with a
+list of of tuples <echo-ids, flags>.
+
+The echo-id identifies the frame from (echos the id from a previous
+UCAN_OUT_TX message). The flag indicates the result of the
+transmission. Whereas a set Bit 0 indicates success. All other bits
+are reserved and set to zero.
+
+Flow Control
+------------
+
+When receiving CAN messages there is no flow control on the USB
+buffer. The driver has to handle inbound message quickly enough to
+avoid drops. I case the device buffer overflow the condition is
+reported by sending corresponding error frames (see
+:ref:`can_ucan_error_handling`)
+
+
+OUT Message Format
+==================
+
+A data packet on the USB OUT endpoint contains one or more ``struct
+ucan_message_out`` values. If multiple messages are batched into one
+data packet, the device uses the ``len`` field to jump to the next
+ucan_message_out value. Each ucan_message_out must be aligned to 4
+bytes (relative to the start of the data buffer). The mechanism is
+same as described in :ref:`can_ucan_in_message_len`.
+
+.. code::
+
+    +----------------------------+ < 0
+    |                            |
+    |   struct ucan_message_out  |
+    |                            |
+    +----------------------------+ < len
+              [padding]
+    +----------------------------+ < round_up(len, 4)
+    |                            |
+    |   struct ucan_message_out  |
+    |                            |
+    +----------------------------+
+                [...]
+
+``type`` field
+--------------
+
+In protocol version 3 only ``UCAN_OUT_TX`` is defined, others are used
+only by legacy devices (protocol version 1).
+
+UCAN_OUT_TX
+~~~~~~~~~~~
+``subtype``
+  echo id to be replied within a CAN_IN_TX_COMPLETE message
+
+Transmit a CAN frame. (parameters: ``id``, ``data``)
+
+Flow Control
+------------
+
+When the device outbound buffers are full it starts sending *NAKs* on
+the *OUT* pipe until more buffers are available. The driver stops the
+queue when a certain threshold of out packets are incomplete.
+
+.. _can_ucan_error_handling:
+
+CAN Error Handling
+==================
+
+If error reporting is turned on the device encodes errors into CAN
+error frames (see ``uapi/linux/can/error.h``) and sends it using the
+IN endpoint. The driver updates its error statistics and forwards
+it.
+
+Although UCAN devices can suppress error frames completely, in Linux
+the driver is always interested. Hence, the device is always started with
+the ``UCAN_MODE_BERR_REPORT`` set. Filtering those messages for the
+user space is done by the driver.
+
+Bus OFF
+-------
+
+- The device does not recover from bus of automatically.
+- Bus OFF is indicated by an error frame (see ``uapi/linux/can/error.h``)
+- Bus OFF recovery is started by ``UCAN_COMMAND_RESTART``
+- Once Bus OFF recover is completed the device sends an error frame
+  indicating that it is on ERROR-ACTIVE state.
+- During Bus OFF no frames are sent by the device.
+- During Bus OFF transmission requests from the host are completed
+  immediately with the success bit left unset.
+
+Example Conversation
+====================
+
+#) Device is connected to USB
+#) Host sends command ``UCAN_COMMAND_RESET``, subcmd 0
+#) Host sends command ``UCAN_COMMAND_GET``, subcmd ``UCAN_COMMAND_GET_INFO``
+#) Device sends ``UCAN_IN_DEVICE_INFO``
+#) Host sends command ``UCAN_OUT_SET_BITTIMING``
+#) Host sends command ``UCAN_COMMAND_START``, subcmd 0, mode ``UCAN_MODE_BERR_REPORT``
index fec8588a588ee2ffb93ef0bc7cf52e3bb8f98397..fcd710f2cc7ade7f89ce81b138ff531bb54a0a11 100644 (file)
@@ -6,15 +6,21 @@ Contents:
 .. toctree::
    :maxdepth: 2
 
+   netdev-FAQ
    af_xdp
    batman-adv
    can
+   can_ucan_protocol
    dpaa2/index
    e100
    e1000
    kapi
    z8530book
    msg_zerocopy
+   failover
+   net_failover
+   alias
+   bridge
 
 .. only::  subproject
 
index ce8fbf5aa63ca36c8cd26b355818662e5c44376f..e74515ecaa9c4b8f66e2fe1a59b584f340ffa718 100644 (file)
@@ -81,6 +81,15 @@ fib_multipath_hash_policy - INTEGER
        0 - Layer 3
        1 - Layer 4
 
+ip_forward_update_priority - INTEGER
+       Whether to update SKB priority from "TOS" field in IPv4 header after it
+       is forwarded. The new SKB priority is mapped from TOS field value
+       according to an rt_tos2priority table (see e.g. man tc-prio).
+       Default: 1 (Update priority.)
+       Possible values:
+       0 - Do not update priority.
+       1 - Update priority.
+
 route/max_size - INTEGER
        Maximum number of routes allowed in the kernel.  Increase
        this when using large numbers of interfaces and/or routes.
@@ -733,11 +742,11 @@ tcp_limit_output_bytes - INTEGER
        Controls TCP Small Queue limit per tcp socket.
        TCP bulk sender tends to increase packets in flight until it
        gets losses notifications. With SNDBUF autotuning, this can
-       result in a large amount of packets queued in qdisc/device
-       on the local machine, hurting latency of other flows, for
-       typical pfifo_fast qdiscs.
-       tcp_limit_output_bytes limits the number of bytes on qdisc
-       or device to reduce artificial RTT/cwnd and reduce bufferbloat.
+       result in a large amount of packets queued on the local machine
+       (e.g.: qdiscs, CPU backlog, or device) hurting latency of other
+       flows, for typical pfifo_fast qdiscs.  tcp_limit_output_bytes
+       limits the number of bytes on qdisc or device to reduce artificial
+       RTT/cwnd and reduce bufferbloat.
        Default: 262144
 
 tcp_challenge_ack_limit - INTEGER
@@ -1834,6 +1843,16 @@ stable_secret - IPv6 address
 
        By default the stable secret is unset.
 
+addr_gen_mode - INTEGER
+       Defines how link-local and autoconf addresses are generated.
+
+       0: generate address based on EUI64 (default)
+       1: do no generate a link-local address, use EUI64 for addresses generated
+          from autoconf
+       2: generate stable privacy addresses, using the secret from
+          stable_secret (RFC7217)
+       3: generate stable privacy addresses, using a random secret if unset
+
 drop_unicast_in_l2_multicast - BOOLEAN
        Drop any unicast IPv6 packets that are received in link-layer
        multicast (or broadcast) frames.
index 70ca2f5800c434a21734879bd753ed1666dfcd61..06c97dcb57caee07743c55d7a500b2a42f1ff0a8 100644 (file)
@@ -36,37 +36,39 @@ feature on the virtio-net interface and assign the same MAC address to both
 virtio-net and VF interfaces.
 
 Here is an example XML snippet that shows such configuration.
-
- <interface type='network'>
-   <mac address='52:54:00:00:12:53'/>
-   <source network='enp66s0f0_br'/>
-   <target dev='tap01'/>
-   <model type='virtio'/>
-   <driver name='vhost' queues='4'/>
-   <link state='down'/>
-   <address type='pci' domain='0x0000' bus='0x00' slot='0x0a' function='0x0'/>
- </interface>
- <interface type='hostdev' managed='yes'>
-   <mac address='52:54:00:00:12:53'/>
-   <source>
-     <address type='pci' domain='0x0000' bus='0x42' slot='0x02' function='0x5'/>
-   </source>
-   <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' function='0x0'/>
- </interface>
+::
+
+  <interface type='network'>
+    <mac address='52:54:00:00:12:53'/>
+    <source network='enp66s0f0_br'/>
+    <target dev='tap01'/>
+    <model type='virtio'/>
+    <driver name='vhost' queues='4'/>
+    <link state='down'/>
+    <address type='pci' domain='0x0000' bus='0x00' slot='0x0a' function='0x0'/>
+  </interface>
+  <interface type='hostdev' managed='yes'>
+    <mac address='52:54:00:00:12:53'/>
+    <source>
+      <address type='pci' domain='0x0000' bus='0x42' slot='0x02' function='0x5'/>
+    </source>
+    <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' function='0x0'/>
+  </interface>
 
 Booting a VM with the above configuration will result in the following 3
 netdevs created in the VM.
-
-4: ens10: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
-    link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff
-    inet 192.168.12.53/24 brd 192.168.12.255 scope global dynamic ens10
-       valid_lft 42482sec preferred_lft 42482sec
-    inet6 fe80::97d8:db2:8c10:b6d6/64 scope link
-       valid_lft forever preferred_lft forever
-5: ens10nsby: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel master ens10 state UP group default qlen 1000
-    link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff
-7: ens11: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq master ens10 state UP group default qlen 1000
-    link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff
+::
+
+  4: ens10: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
+      link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff
+      inet 192.168.12.53/24 brd 192.168.12.255 scope global dynamic ens10
+         valid_lft 42482sec preferred_lft 42482sec
+      inet6 fe80::97d8:db2:8c10:b6d6/64 scope link
+         valid_lft forever preferred_lft forever
+  5: ens10nsby: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel master ens10 state UP group default qlen 1000
+      link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff
+  7: ens11: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq master ens10 state UP group default qlen 1000
+      link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff
 
 ens10 is the 'failover' master netdev, ens10nsby and ens11 are the slave
 'standby' and 'primary' netdevs respectively.
@@ -80,37 +82,38 @@ the paravirtual datapath when the VF is unplugged.
 
 Here is a sample script that shows the steps to initiate live migration on
 the source hypervisor.
+::
 
-# cat vf_xml
-<interface type='hostdev' managed='yes'>
-  <mac address='52:54:00:00:12:53'/>
-  <source>
-    <address type='pci' domain='0x0000' bus='0x42' slot='0x02' function='0x5'/>
-  </source>
-  <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' function='0x0'/>
-</interface>
+  # cat vf_xml
+  <interface type='hostdev' managed='yes'>
+    <mac address='52:54:00:00:12:53'/>
+    <source>
+      <address type='pci' domain='0x0000' bus='0x42' slot='0x02' function='0x5'/>
+    </source>
+    <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' function='0x0'/>
+  </interface>
 
-# Source Hypervisor
-#!/bin/bash
+  # Source Hypervisor
+  #!/bin/bash
 
-DOMAIN=fedora27-tap01
-PF=enp66s0f0
-VF_NUM=5
-TAP_IF=tap01
-VF_XML=
+  DOMAIN=fedora27-tap01
+  PF=enp66s0f0
+  VF_NUM=5
+  TAP_IF=tap01
+  VF_XML=
 
-MAC=52:54:00:00:12:53
-ZERO_MAC=00:00:00:00:00:00
+  MAC=52:54:00:00:12:53
+  ZERO_MAC=00:00:00:00:00:00
 
-virsh domif-setlink $DOMAIN $TAP_IF up
-bridge fdb del $MAC dev $PF master
-virsh detach-device $DOMAIN $VF_XML
-ip link set $PF vf $VF_NUM mac $ZERO_MAC
+  virsh domif-setlink $DOMAIN $TAP_IF up
+  bridge fdb del $MAC dev $PF master
+  virsh detach-device $DOMAIN $VF_XML
+  ip link set $PF vf $VF_NUM mac $ZERO_MAC
 
-virsh migrate --live $DOMAIN qemu+ssh://$REMOTE_HOST/system
+  virsh migrate --live $DOMAIN qemu+ssh://$REMOTE_HOST/system
 
-# Destination Hypervisor
-#!/bin/bash
+  # Destination Hypervisor
+  #!/bin/bash
 
-virsh attach-device $DOMAIN $VF_XML
-virsh domif-setlink $DOMAIN $TAP_IF down
+  virsh attach-device $DOMAIN $VF_XML
+  virsh domif-setlink $DOMAIN $TAP_IF down
diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
new file mode 100644 (file)
index 0000000..0ac5fa7
--- /dev/null
@@ -0,0 +1,259 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _netdev-FAQ:
+
+==========
+netdev FAQ
+==========
+
+Q: What is netdev?
+------------------
+A: It is a mailing list for all network-related Linux stuff.  This
+includes anything found under net/ (i.e. core code like IPv6) and
+drivers/net (i.e. hardware specific drivers) in the Linux source tree.
+
+Note that some subsystems (e.g. wireless drivers) which have a high
+volume of traffic have their own specific mailing lists.
+
+The netdev list is managed (like many other Linux mailing lists) through
+VGER (http://vger.kernel.org/) and archives can be found below:
+
+-  http://marc.info/?l=linux-netdev
+-  http://www.spinics.net/lists/netdev/
+
+Aside from subsystems like that mentioned above, all network-related
+Linux development (i.e. RFC, review, comments, etc.) takes place on
+netdev.
+
+Q: How do the changes posted to netdev make their way into Linux?
+-----------------------------------------------------------------
+A: There are always two trees (git repositories) in play.  Both are
+driven by David Miller, the main network maintainer.  There is the
+``net`` tree, and the ``net-next`` tree.  As you can probably guess from
+the names, the ``net`` tree is for fixes to existing code already in the
+mainline tree from Linus, and ``net-next`` is where the new code goes
+for the future release.  You can find the trees here:
+
+- https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
+- https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
+
+Q: How often do changes from these trees make it to the mainline Linus tree?
+----------------------------------------------------------------------------
+A: To understand this, you need to know a bit of background information on
+the cadence of Linux development.  Each new release starts off with a
+two week "merge window" where the main maintainers feed their new stuff
+to Linus for merging into the mainline tree.  After the two weeks, the
+merge window is closed, and it is called/tagged ``-rc1``.  No new
+features get mainlined after this -- only fixes to the rc1 content are
+expected.  After roughly a week of collecting fixes to the rc1 content,
+rc2 is released.  This repeats on a roughly weekly basis until rc7
+(typically; sometimes rc6 if things are quiet, or rc8 if things are in a
+state of churn), and a week after the last vX.Y-rcN was done, the
+official vX.Y is released.
+
+Relating that to netdev: At the beginning of the 2-week merge window,
+the ``net-next`` tree will be closed - no new changes/features.  The
+accumulated new content of the past ~10 weeks will be passed onto
+mainline/Linus via a pull request for vX.Y -- at the same time, the
+``net`` tree will start accumulating fixes for this pulled content
+relating to vX.Y
+
+An announcement indicating when ``net-next`` has been closed is usually
+sent to netdev, but knowing the above, you can predict that in advance.
+
+IMPORTANT: Do not send new ``net-next`` content to netdev during the
+period during which ``net-next`` tree is closed.
+
+Shortly after the two weeks have passed (and vX.Y-rc1 is released), the
+tree for ``net-next`` reopens to collect content for the next (vX.Y+1)
+release.
+
+If you aren't subscribed to netdev and/or are simply unsure if
+``net-next`` has re-opened yet, simply check the ``net-next`` git
+repository link above for any new networking-related commits.  You may
+also check the following website for the current status:
+
+  http://vger.kernel.org/~davem/net-next.html
+
+The ``net`` tree continues to collect fixes for the vX.Y content, and is
+fed back to Linus at regular (~weekly) intervals.  Meaning that the
+focus for ``net`` is on stabilization and bug fixes.
+
+Finally, the vX.Y gets released, and the whole cycle starts over.
+
+Q: So where are we now in this cycle?
+
+Load the mainline (Linus) page here:
+
+  https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+
+and note the top of the "tags" section.  If it is rc1, it is early in
+the dev cycle.  If it was tagged rc7 a week ago, then a release is
+probably imminent.
+
+Q: How do I indicate which tree (net vs. net-next) my patch should be in?
+-------------------------------------------------------------------------
+A: Firstly, think whether you have a bug fix or new "next-like" content.
+Then once decided, assuming that you use git, use the prefix flag, i.e.
+::
+
+  git format-patch --subject-prefix='PATCH net-next' start..finish
+
+Use ``net`` instead of ``net-next`` (always lower case) in the above for
+bug-fix ``net`` content.  If you don't use git, then note the only magic
+in the above is just the subject text of the outgoing e-mail, and you
+can manually change it yourself with whatever MUA you are comfortable
+with.
+
+Q: I sent a patch and I'm wondering what happened to it?
+--------------------------------------------------------
+Q: How can I tell whether it got merged?
+A: Start by looking at the main patchworks queue for netdev:
+
+  http://patchwork.ozlabs.org/project/netdev/list/
+
+The "State" field will tell you exactly where things are at with your
+patch.
+
+Q: The above only says "Under Review".  How can I find out more?
+----------------------------------------------------------------
+A: Generally speaking, the patches get triaged quickly (in less than
+48h).  So be patient.  Asking the maintainer for status updates on your
+patch is a good way to ensure your patch is ignored or pushed to the
+bottom of the priority list.
+
+Q: I submitted multiple versions of the patch series
+----------------------------------------------------
+Q: should I directly update patchwork for the previous versions of these
+patch series?
+A: No, please don't interfere with the patch status on patchwork, leave
+it to the maintainer to figure out what is the most recent and current
+version that should be applied. If there is any doubt, the maintainer
+will reply and ask what should be done.
+
+Q: How can I tell what patches are queued up for backporting to the various stable releases?
+--------------------------------------------------------------------------------------------
+A: Normally Greg Kroah-Hartman collects stable commits himself, but for
+networking, Dave collects up patches he deems critical for the
+networking subsystem, and then hands them off to Greg.
+
+There is a patchworks queue that you can see here:
+
+  http://patchwork.ozlabs.org/bundle/davem/stable/?state=*
+
+It contains the patches which Dave has selected, but not yet handed off
+to Greg.  If Greg already has the patch, then it will be here:
+
+  https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git
+
+A quick way to find whether the patch is in this stable-queue is to
+simply clone the repo, and then git grep the mainline commit ID, e.g.
+::
+
+  stable-queue$ git grep -l 284041ef21fdf2e
+  releases/3.0.84/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
+  releases/3.4.51/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
+  releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
+  stable/stable-queue$
+
+Q: I see a network patch and I think it should be backported to stable.
+-----------------------------------------------------------------------
+Q: Should I request it via stable@vger.kernel.org like the references in
+the kernel's Documentation/process/stable-kernel-rules.rst file say?
+A: No, not for networking.  Check the stable queues as per above first
+to see if it is already queued.  If not, then send a mail to netdev,
+listing the upstream commit ID and why you think it should be a stable
+candidate.
+
+Before you jump to go do the above, do note that the normal stable rules
+in :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
+still apply.  So you need to explicitly indicate why it is a critical
+fix and exactly what users are impacted.  In addition, you need to
+convince yourself that you *really* think it has been overlooked,
+vs. having been considered and rejected.
+
+Generally speaking, the longer it has had a chance to "soak" in
+mainline, the better the odds that it is an OK candidate for stable.  So
+scrambling to request a commit be added the day after it appears should
+be avoided.
+
+Q: I have created a network patch and I think it should be backported to stable.
+--------------------------------------------------------------------------------
+Q: Should I add a Cc: stable@vger.kernel.org like the references in the
+kernel's Documentation/ directory say?
+A: No.  See above answer.  In short, if you think it really belongs in
+stable, then ensure you write a decent commit log that describes who
+gets impacted by the bug fix and how it manifests itself, and when the
+bug was introduced.  If you do that properly, then the commit will get
+handled appropriately and most likely get put in the patchworks stable
+queue if it really warrants it.
+
+If you think there is some valid information relating to it being in
+stable that does *not* belong in the commit log, then use the three dash
+marker line as described in
+:ref:`Documentation/process/submitting-patches.rst <the_canonical_patch_format>`
+to temporarily embed that information into the patch that you send.
+
+Q: Are all networking bug fixes backported to all stable releases?
+------------------------------------------------------------------
+A: Due to capacity, Dave could only take care of the backports for the
+last two stable releases. For earlier stable releases, each stable
+branch maintainer is supposed to take care of them. If you find any
+patch is missing from an earlier stable branch, please notify
+stable@vger.kernel.org with either a commit ID or a formal patch
+backported, and CC Dave and other relevant networking developers.
+
+Q: Is the comment style convention different for the networking content?
+------------------------------------------------------------------------
+A: Yes, in a largely trivial way.  Instead of this::
+
+  /*
+   * foobar blah blah blah
+   * another line of text
+   */
+
+it is requested that you make it look like this::
+
+  /* foobar blah blah blah
+   * another line of text
+   */
+
+Q: I am working in existing code that has the former comment style and not the latter.
+--------------------------------------------------------------------------------------
+Q: Should I submit new code in the former style or the latter?
+A: Make it the latter style, so that eventually all code in the domain
+of netdev is of this format.
+
+Q: I found a bug that might have possible security implications or similar.
+---------------------------------------------------------------------------
+Q: Should I mail the main netdev maintainer off-list?**
+A: No. The current netdev maintainer has consistently requested that
+people use the mailing lists and not reach out directly.  If you aren't
+OK with that, then perhaps consider mailing security@kernel.org or
+reading about http://oss-security.openwall.org/wiki/mailing-lists/distros
+as possible alternative mechanisms.
+
+Q: What level of testing is expected before I submit my change?
+---------------------------------------------------------------
+A: If your changes are against ``net-next``, the expectation is that you
+have tested by layering your changes on top of ``net-next``.  Ideally
+you will have done run-time testing specific to your change, but at a
+minimum, your changes should survive an ``allyesconfig`` and an
+``allmodconfig`` build without new warnings or failures.
+
+Q: Any other tips to help ensure my net/net-next patch gets OK'd?
+-----------------------------------------------------------------
+A: Attention to detail.  Re-read your own work as if you were the
+reviewer.  You can start with using ``checkpatch.pl``, perhaps even with
+the ``--strict`` flag.  But do not be mindlessly robotic in doing so.
+If your change is a bug fix, make sure your commit log indicates the
+end-user visible symptom, the underlying reason as to why it happens,
+and then if necessary, explain why the fix proposed is the best way to
+get things done.  Don't mangle whitespace, and as is common, don't
+mis-indent function arguments that span multiple lines.  If it is your
+first patch, mail it to yourself so you can test apply it to an
+unpatched tree to confirm infrastructure didn't mangle it.
+
+Finally, go back and read
+:ref:`Documentation/process/submitting-patches.rst <submittingpatches>`
+to be sure you are not repeating some common mistake documented there.
diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt
deleted file mode 100644 (file)
index fa951b8..0000000
+++ /dev/null
@@ -1,244 +0,0 @@
-
-Information you need to know about netdev
------------------------------------------
-
-Q: What is netdev?
-
-A: It is a mailing list for all network-related Linux stuff.  This includes
-   anything found under net/  (i.e. core code like IPv6) and drivers/net
-   (i.e. hardware specific drivers) in the Linux source tree.
-
-   Note that some subsystems (e.g. wireless drivers) which have a high volume
-   of traffic have their own specific mailing lists.
-
-   The netdev list is managed (like many other Linux mailing lists) through
-   VGER ( http://vger.kernel.org/ ) and archives can be found below:
-
-       http://marc.info/?l=linux-netdev
-       http://www.spinics.net/lists/netdev/
-
-   Aside from subsystems like that mentioned above, all network-related Linux
-   development (i.e. RFC, review, comments, etc.) takes place on netdev.
-
-Q: How do the changes posted to netdev make their way into Linux?
-
-A: There are always two trees (git repositories) in play.  Both are driven
-   by David Miller, the main network maintainer.  There is the "net" tree,
-   and the "net-next" tree.  As you can probably guess from the names, the
-   net tree is for fixes to existing code already in the mainline tree from
-   Linus, and net-next is where the new code goes for the future release.
-   You can find the trees here:
-
-        https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
-        https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
-
-Q: How often do changes from these trees make it to the mainline Linus tree?
-
-A: To understand this, you need to know a bit of background information
-   on the cadence of Linux development.  Each new release starts off with
-   a two week "merge window" where the main maintainers feed their new
-   stuff to Linus for merging into the mainline tree.  After the two weeks,
-   the merge window is closed, and it is called/tagged "-rc1".  No new
-   features get mainlined after this -- only fixes to the rc1 content
-   are expected.  After roughly a week of collecting fixes to the rc1
-   content, rc2 is released.  This repeats on a roughly weekly basis
-   until rc7 (typically; sometimes rc6 if things are quiet, or rc8 if
-   things are in a state of churn), and a week after the last vX.Y-rcN
-   was done, the official "vX.Y" is released.
-
-   Relating that to netdev:  At the beginning of the 2-week merge window,
-   the net-next tree will be closed - no new changes/features.  The
-   accumulated new content of the past ~10 weeks will be passed onto
-   mainline/Linus via a pull request for vX.Y -- at the same time,
-   the "net" tree will start accumulating fixes for this pulled content
-   relating to vX.Y
-
-   An announcement indicating when net-next has been closed is usually
-   sent to netdev, but knowing the above, you can predict that in advance.
-
-   IMPORTANT:  Do not send new net-next content to netdev during the
-   period during which net-next tree is closed.
-
-   Shortly after the two weeks have passed (and vX.Y-rc1 is released), the
-   tree for net-next reopens to collect content for the next (vX.Y+1) release.
-
-   If you aren't subscribed to netdev and/or are simply unsure if net-next
-   has re-opened yet, simply check the net-next git repository link above for
-   any new networking-related commits.  You may also check the following
-   website for the current status:
-
-        http://vger.kernel.org/~davem/net-next.html
-
-   The "net" tree continues to collect fixes for the vX.Y content, and
-   is fed back to Linus at regular (~weekly) intervals.  Meaning that the
-   focus for "net" is on stabilization and bugfixes.
-
-   Finally, the vX.Y gets released, and the whole cycle starts over.
-
-Q: So where are we now in this cycle?
-
-A: Load the mainline (Linus) page here:
-
-       https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
-
-   and note the top of the "tags" section.  If it is rc1, it is early
-   in the dev cycle.  If it was tagged rc7 a week ago, then a release
-   is probably imminent.
-
-Q: How do I indicate which tree (net vs. net-next) my patch should be in?
-
-A: Firstly, think whether you have a bug fix or new "next-like" content.
-   Then once decided, assuming that you use git, use the prefix flag, i.e.
-
-       git format-patch --subject-prefix='PATCH net-next' start..finish
-
-   Use "net" instead of "net-next" (always lower case) in the above for
-   bug-fix net content.  If you don't use git, then note the only magic in
-   the above is just the subject text of the outgoing e-mail, and you can
-   manually change it yourself with whatever MUA you are comfortable with.
-
-Q: I sent a patch and I'm wondering what happened to it.  How can I tell
-   whether it got merged?
-
-A: Start by looking at the main patchworks queue for netdev:
-
-       http://patchwork.ozlabs.org/project/netdev/list/
-
-   The "State" field will tell you exactly where things are at with
-   your patch.
-
-Q: The above only says "Under Review".  How can I find out more?
-
-A: Generally speaking, the patches get triaged quickly (in less than 48h).
-   So be patient.  Asking the maintainer for status updates on your
-   patch is a good way to ensure your patch is ignored or pushed to
-   the bottom of the priority list.
-
-Q: I submitted multiple versions of the patch series, should I directly update
-   patchwork for the previous versions of these patch series?
-
-A: No, please don't interfere with the patch status on patchwork, leave it to
-   the maintainer to figure out what is the most recent and current version that
-   should be applied. If there is any doubt, the maintainer will reply and ask
-   what should be done.
-
-Q: How can I tell what patches are queued up for backporting to the
-   various stable releases?
-
-A: Normally Greg Kroah-Hartman collects stable commits himself, but
-   for networking, Dave collects up patches he deems critical for the
-   networking subsystem, and then hands them off to Greg.
-
-   There is a patchworks queue that you can see here:
-       http://patchwork.ozlabs.org/bundle/davem/stable/?state=*
-
-   It contains the patches which Dave has selected, but not yet handed
-   off to Greg.  If Greg already has the patch, then it will be here:
-       https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git
-
-   A quick way to find whether the patch is in this stable-queue is
-   to simply clone the repo, and then git grep the mainline commit ID, e.g.
-
-       stable-queue$ git grep -l 284041ef21fdf2e
-       releases/3.0.84/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
-       releases/3.4.51/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
-       releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
-       stable/stable-queue$
-
-Q: I see a network patch and I think it should be backported to stable.
-   Should I request it via "stable@vger.kernel.org" like the references in
-   the kernel's Documentation/process/stable-kernel-rules.rst file say?
-
-A: No, not for networking.  Check the stable queues as per above 1st to see
-   if it is already queued.  If not, then send a mail to netdev, listing
-   the upstream commit ID and why you think it should be a stable candidate.
-
-   Before you jump to go do the above, do note that the normal stable rules
-   in Documentation/process/stable-kernel-rules.rst still apply.  So you need to
-   explicitly indicate why it is a critical fix and exactly what users are
-   impacted.  In addition, you need to convince yourself that you _really_
-   think it has been overlooked, vs. having been considered and rejected.
-
-   Generally speaking, the longer it has had a chance to "soak" in mainline,
-   the better the odds that it is an OK candidate for stable.  So scrambling
-   to request a commit be added the day after it appears should be avoided.
-
-Q: I have created a network patch and I think it should be backported to
-   stable.  Should I add a "Cc: stable@vger.kernel.org" like the references
-   in the kernel's Documentation/ directory say?
-
-A: No.  See above answer.  In short, if you think it really belongs in
-   stable, then ensure you write a decent commit log that describes who
-   gets impacted by the bugfix and how it manifests itself, and when the
-   bug was introduced.  If you do that properly, then the commit will
-   get handled appropriately and most likely get put in the patchworks
-   stable queue if it really warrants it.
-
-   If you think there is some valid information relating to it being in
-   stable that does _not_ belong in the commit log, then use the three
-   dash marker line as described in Documentation/process/submitting-patches.rst to
-   temporarily embed that information into the patch that you send.
-
-Q: Are all networking bug fixes backported to all stable releases?
-
-A: Due to capacity, Dave could only take care of the backports for the last
-   2 stable releases. For earlier stable releases, each stable branch maintainer
-   is supposed to take care of them. If you find any patch is missing from an
-   earlier stable branch, please notify stable@vger.kernel.org with either a
-   commit ID or a formal patch backported, and CC Dave and other relevant
-   networking developers.
-
-Q: Someone said that the comment style and coding convention is different
-   for the networking content.  Is this true?
-
-A: Yes, in a largely trivial way.  Instead of this:
-
-       /*
-        * foobar blah blah blah
-        * another line of text
-        */
-
-   it is requested that you make it look like this:
-
-       /* foobar blah blah blah
-        * another line of text
-        */
-
-Q: I am working in existing code that has the former comment style and not the
-   latter.  Should I submit new code in the former style or the latter?
-
-A: Make it the latter style, so that eventually all code in the domain of
-   netdev is of this format.
-
-Q: I found a bug that might have possible security implications or similar.
-   Should I mail the main netdev maintainer off-list?
-
-A: No. The current netdev maintainer has consistently requested that people
-   use the mailing lists and not reach out directly.  If you aren't OK with
-   that, then perhaps consider mailing "security@kernel.org" or reading about
-   http://oss-security.openwall.org/wiki/mailing-lists/distros
-   as possible alternative mechanisms.
-
-Q: What level of testing is expected before I submit my change?
-
-A: If your changes are against net-next, the expectation is that you
-   have tested by layering your changes on top of net-next.  Ideally you
-   will have done run-time testing specific to your change, but at a
-   minimum, your changes should survive an "allyesconfig" and an
-   "allmodconfig" build without new warnings or failures.
-
-Q: Any other tips to help ensure my net/net-next patch gets OK'd?
-
-A: Attention to detail.  Re-read your own work as if you were the
-   reviewer.  You can start with using checkpatch.pl, perhaps even
-   with the "--strict" flag.  But do not be mindlessly robotic in
-   doing so.  If your change is a bug-fix, make sure your commit log
-   indicates the end-user visible symptom, the underlying reason as
-   to why it happens, and then if necessary, explain why the fix proposed
-   is the best way to get things done.   Don't mangle whitespace, and as
-   is common, don't mis-indent function arguments that span multiple lines.
-   If it is your first patch, mail it to yourself so you can test apply
-   it to an unpatched tree to confirm infrastructure didn't mangle it.
-
-   Finally, go back and read Documentation/process/submitting-patches.rst to be
-   sure you are not repeating some common mistake documented there.
index f55639d71d35b8c466252808cf9b0e58d42b7f30..b7056a8a0540682163a39f90aeb38ec8314bf77d 100644 (file)
@@ -366,8 +366,13 @@ XPS: Transmit Packet Steering
 
 Transmit Packet Steering is a mechanism for intelligently selecting
 which transmit queue to use when transmitting a packet on a multi-queue
-device. To accomplish this, a mapping from CPU to hardware queue(s) is
-recorded. The goal of this mapping is usually to assign queues
+device. This can be accomplished by recording two kinds of maps, either
+a mapping of CPU to hardware queue(s) or a mapping of receive queue(s)
+to hardware transmit queue(s).
+
+1. XPS using CPUs map
+
+The goal of this mapping is usually to assign queues
 exclusively to a subset of CPUs, where the transmit completions for
 these queues are processed on a CPU within this set. This choice
 provides two benefits. First, contention on the device queue lock is
@@ -377,15 +382,40 @@ transmit queue). Secondly, cache miss rate on transmit completion is
 reduced, in particular for data cache lines that hold the sk_buff
 structures.
 
-XPS is configured per transmit queue by setting a bitmap of CPUs that
-may use that queue to transmit. The reverse mapping, from CPUs to
-transmit queues, is computed and maintained for each network device.
-When transmitting the first packet in a flow, the function
-get_xps_queue() is called to select a queue. This function uses the ID
-of the running CPU as a key into the CPU-to-queue lookup table. If the
+2. XPS using receive queues map
+
+This mapping is used to pick transmit queue based on the receive
+queue(s) map configuration set by the administrator. A set of receive
+queues can be mapped to a set of transmit queues (many:many), although
+the common use case is a 1:1 mapping. This will enable sending packets
+on the same queue associations for transmit and receive. This is useful for
+busy polling multi-threaded workloads where there are challenges in
+associating a given CPU to a given application thread. The application
+threads are not pinned to CPUs and each thread handles packets
+received on a single queue. The receive queue number is cached in the
+socket for the connection. In this model, sending the packets on the same
+transmit queue corresponding to the associated receive queue has benefits
+in keeping the CPU overhead low. Transmit completion work is locked into
+the same queue-association that a given application is polling on. This
+avoids the overhead of triggering an interrupt on another CPU. When the
+application cleans up the packets during the busy poll, transmit completion
+may be processed along with it in the same thread context and so result in
+reduced latency.
+
+XPS is configured per transmit queue by setting a bitmap of
+CPUs/receive-queues that may use that queue to transmit. The reverse
+mapping, from CPUs to transmit queues or from receive-queues to transmit
+queues, is computed and maintained for each network device. When
+transmitting the first packet in a flow, the function get_xps_queue() is
+called to select a queue. This function uses the ID of the receive queue
+for the socket connection for a match in the receive queue-to-transmit queue
+lookup table. Alternatively, this function can also use the ID of the
+running CPU as a key into the CPU-to-queue lookup table. If the
 ID matches a single queue, that is used for transmission. If multiple
 queues match, one is selected by using the flow hash to compute an index
-into the set.
+into the set. When selecting the transmit queue based on receive queue(s)
+map, the transmit device is not validated against the receive device as it
+requires expensive lookup operation in the datapath.
 
 The queue chosen for transmitting a particular flow is saved in the
 corresponding socket structure for the flow (e.g. a TCP connection).
@@ -404,11 +434,15 @@ acknowledged.
 
 XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by
 default for SMP). The functionality remains disabled until explicitly
-configured. To enable XPS, the bitmap of CPUs that may use a transmit
-queue is configured using the sysfs file entry:
+configured. To enable XPS, the bitmap of CPUs/receive-queues that may
+use a transmit queue is configured using the sysfs file entry:
 
+For selection based on CPUs map:
 /sys/class/net/<dev>/queues/tx-<n>/xps_cpus
 
+For selection based on receive-queues map:
+/sys/class/net/<dev>/queues/tx-<n>/xps_rxqs
+
 == Suggested Configuration
 
 For a network device with a single transmission queue, XPS configuration
@@ -421,6 +455,11 @@ best CPUs to share a given queue are probably those that share the cache
 with the CPU that processes transmit completions for that queue
 (transmit interrupts).
 
+For transmit queue selection based on receive queue(s), XPS has to be
+explicitly configured mapping receive-queue(s) to transmit queue(s). If the
+user configuration for receive-queue map does not apply, then the transmit
+queue is selected based on the CPUs map.
+
 Per TX Queue rate limitation:
 =============================
 
diff --git a/Documentation/networking/ti-cpsw.txt b/Documentation/networking/ti-cpsw.txt
new file mode 100644 (file)
index 0000000..6703920
--- /dev/null
@@ -0,0 +1,540 @@
+* Texas Instruments CPSW ethernet driver
+
+Multiqueue & CBS & MQPRIO
+=====================================================================
+=====================================================================
+
+The cpsw has 3 CBS shapers for each external ports. This document
+describes MQPRIO and CBS Qdisc offload configuration for cpsw driver
+based on examples. It potentially can be used in audio video bridging
+(AVB) and time sensitive networking (TSN).
+
+The following examples were tested on AM572x EVM and BBB boards.
+
+Test setup
+==========
+
+Under consideration two examples with AM572x EVM running cpsw driver
+in dual_emac mode.
+
+Several prerequisites:
+- TX queues must be rated starting from txq0 that has highest priority
+- Traffic classes are used starting from 0, that has highest priority
+- CBS shapers should be used with rated queues
+- The bandwidth for CBS shapers has to be set a little bit more then
+  potential incoming rate, thus, rate of all incoming tx queues has
+  to be a little less
+- Real rates can differ, due to discreetness
+- Map skb-priority to txq is not enough, also skb-priority to l2 prio
+  map has to be created with ip or vconfig tool
+- Any l2/socket prio (0 - 7) for classes can be used, but for
+  simplicity default values are used: 3 and 2
+- only 2 classes tested: A and B, but checked and can work with more,
+  maximum allowed 4, but only for 3 rate can be set.
+
+Test setup for examples
+=======================
+                                    +-------------------------------+
+                                    |--+                            |
+                                    |  |      Workstation0          |
+                                    |E |  MAC 18:03:73:66:87:42     |
++-----------------------------+  +--|t |                            |
+|                    | 1  | E |  |  |h |./tsn_listener -d \         |
+|  Target board:     | 0  | t |--+  |0 | 18:03:73:66:87:42 -i eth0 \|
+|  AM572x EVM        | 0  | h |     |  | -s 1500                    |
+|                    | 0  | 0 |     |--+                            |
+|  Only 2 classes:   |Mb  +---|     +-------------------------------+
+|  class A, class B  |        |
+|                    |    +---|     +-------------------------------+
+|                    | 1  | E |     |--+                            |
+|                    | 0  | t |     |  |      Workstation1          |
+|                    | 0  | h |--+  |E |  MAC 20:cf:30:85:7d:fd     |
+|                    |Mb  | 1 |  +--|t |                            |
++-----------------------------+     |h |./tsn_listener -d \         |
+                                    |0 | 20:cf:30:85:7d:fd -i eth0 \|
+                                    |  | -s 1500                    |
+                                    |--+                            |
+                                    +-------------------------------+
+
+*********************************************************************
+*********************************************************************
+*********************************************************************
+Example 1: One port tx AVB configuration scheme for target board
+----------------------------------------------------------------------
+(prints and scheme for AM572x evm, applicable for single port boards)
+
+tc - traffic class
+txq - transmit queue
+p - priority
+f - fifo (cpsw fifo)
+S - shaper configured
+
++------------------------------------------------------------------+ u
+| +---------------+  +---------------+  +------+ +------+          | s
+| |               |  |               |  |      | |      |          | e
+| | App 1         |  | App 2         |  | Apps | | Apps |          | r
+| | Class A       |  | Class B       |  | Rest | | Rest |          |
+| | Eth0          |  | Eth0          |  | Eth0 | | Eth1 |          | s
+| | VLAN100       |  | VLAN100       |  |   |  | |   |  |          | p
+| | 40 Mb/s       |  | 20 Mb/s       |  |   |  | |   |  |          | a
+| | SO_PRIORITY=3 |  | SO_PRIORITY=2 |  |   |  | |   |  |          | c
+| |   |           |  |   |           |  |   |  | |   |  |          | e
+| +---|-----------+  +---|-----------+  +---|--+ +---|--+          |
++-----|------------------|------------------|--------|-------------+
+    +-+     +------------+                  |        |
+    |       |             +-----------------+     +--+
+    |       |             |                       |
++---|-------|-------------|-----------------------|----------------+
+| +----+ +----+ +----+ +----+                   +----+             |
+| | p3 | | p2 | | p1 | | p0 |                   | p0 |             | k
+| \    / \    / \    / \    /                   \    /             | e
+|  \  /   \  /   \  /   \  /                     \  /              | r
+|   \/     \/     \/     \/                       \/               | n
+|    |     |             |                        |                | e
+|    |     |       +-----+                        |                | l
+|    |     |       |                              |                |
+| +----+ +----+ +----+                          +----+             | s
+| |tc0 | |tc1 | |tc2 |                          |tc0 |             | p
+| \    / \    / \    /                          \    /             | a
+|  \  /   \  /   \  /                            \  /              | c
+|   \/     \/     \/                              \/               | e
+|   |      |       +-----+                        |                |
+|   |      |       |     |                        |                |
+|   |      |       |     |                        |                |
+|   |      |       |     |                        |                |
+| +----+ +----+ +----+ +----+                   +----+             |
+| |txq0| |txq1| |txq2| |txq3|                   |txq4|             |
+| \    / \    / \    / \    /                   \    /             |
+|  \  /   \  /   \  /   \  /                     \  /              |
+|   \/     \/     \/     \/                       \/               |
+| +-|------|------|------|--+                  +--|--------------+ |
+| | |      |      |      |  | Eth0.100         |  |     Eth1     | |
++---|------|------|------|------------------------|----------------+
+    |      |      |      |                        |
+    p      p      p      p                        |
+    3      2      0-1, 4-7  <- L2 priority        |
+    |      |      |      |                        |
+    |      |      |      |                        |
++---|------|------|------|------------------------|----------------+
+|   |      |      |      |             |----------+                |
+| +----+ +----+ +----+ +----+       +----+                         |
+| |dma7| |dma6| |dma5| |dma4|       |dma3|                         |
+| \    / \    / \    / \    /       \    /                         | c
+|  \S /   \S /   \  /   \  /         \  /                          | p
+|   \/     \/     \/     \/           \/                           | s
+|   |      |      | +-----            |                            | w
+|   |      |      | |                 |                            |
+|   |      |      | |                 |                            | d
+| +----+ +----+ +----+p            p+----+                         | r
+| |    | |    | |    |o            o|    |                         | i
+| | f3 | | f2 | | f0 |r            r| f0 |                         | v
+| |tc0 | |tc1 | |tc2 |t            t|tc0 |                         | e
+| \CBS / \CBS / \CBS /1            2\CBS /                         | r
+|  \S /   \S /   \  /                \  /                          |
+|   \/     \/     \/                  \/                           |
++------------------------------------------------------------------+
+========================================Eth==========================>
+
+1)
+// Add 4 tx queues, for interface Eth0, and 1 tx queue for Eth1
+$ ethtool -L eth0 rx 1 tx 5
+rx unmodified, ignoring
+
+2)
+// Check if num of queues is set correctly:
+$ ethtool -l eth0
+Channel parameters for eth0:
+Pre-set maximums:
+RX:             8
+TX:             8
+Other:          0
+Combined:       0
+Current hardware settings:
+RX:             1
+TX:             5
+Other:          0
+Combined:       0
+
+3)
+// TX queues must be rated starting from 0, so set bws for tx0 and tx1
+// Set rates 40 and 20 Mb/s appropriately.
+// Pay attention, real speed can differ a bit due to discreetness.
+// Leave last 2 tx queues not rated.
+$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate
+$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate
+
+4)
+// Check maximum rate of tx (cpdma) queues:
+$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate
+40
+20
+0
+0
+0
+
+5)
+// Map skb->priority to traffic class:
+// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+// Map traffic class to transmit queue:
+// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq2, txq3)
+$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
+map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1
+
+5a)
+// As two interface sharing same set of tx queues, assign all traffic
+// coming to interface Eth1 to separate queue in order to not mix it
+// with traffic from interface Eth0, so use separate txq to send
+// packets to Eth1, so all prio -> tc0 and tc0 -> txq4
+// Here hw 0, so here still default configuration for eth1 in hw
+$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 1 \
+map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@4 hw 0
+
+6)
+// Check classes settings
+$ tc -g class show dev eth0
++---(100:ffe2) mqprio
+|    +---(100:3) mqprio
+|    +---(100:4) mqprio
+|
++---(100:ffe1) mqprio
+|    +---(100:2) mqprio
+|
++---(100:ffe0) mqprio
+     +---(100:1) mqprio
+
+$ tc -g class show dev eth1
++---(100:ffe0) mqprio
+     +---(100:5) mqprio
+
+7)
+// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc
+// Set it +1 Mb for reserve (important!)
+// here only idle slope is important, others arg are ignored
+// Pay attention, real speed can differ a bit due to discreetness
+$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1438 \
+hicredit 62 sendslope -959000 idleslope 41000 offload 1
+net eth0: set FIFO3 bw = 50
+
+8)
+// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc:
+// Set it +1 Mb for reserve (important!)
+$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1468 \
+hicredit 65 sendslope -979000 idleslope 21000 offload 1
+net eth0: set FIFO2 bw = 30
+
+9)
+// Create vlan 100 to map sk->priority to vlan qos
+$ ip link add link eth0 name eth0.100 type vlan id 100
+8021q: 802.1Q VLAN Support v1.8
+8021q: adding VLAN 0 to HW filter on device eth0
+8021q: adding VLAN 0 to HW filter on device eth1
+net eth0: Adding vlanid 100 to vlan filter
+
+10)
+// Map skb->priority to L2 prio, 1 to 1
+$ ip link set eth0.100 type vlan \
+egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+11)
+// Check egress map for vlan 100
+$ cat /proc/net/vlan/eth0.100
+[...]
+INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
+EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+12)
+// Run your appropriate tools with socket option "SO_PRIORITY"
+// to 3 for class A and/or to 2 for class B
+// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500&
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500&
+
+13)
+// run your listener on workstation (should be in same vlan)
+// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39000 kbps
+
+14)
+// Restore default configuration if needed
+$ ip link del eth0.100
+$ tc qdisc del dev eth1 root
+$ tc qdisc del dev eth0 root
+net eth0: Prev FIFO2 is shaped
+net eth0: set FIFO3 bw = 0
+net eth0: set FIFO2 bw = 0
+$ ethtool -L eth0 rx 1 tx 1
+
+*********************************************************************
+*********************************************************************
+*********************************************************************
+Example 2: Two port tx AVB configuration scheme for target board
+----------------------------------------------------------------------
+(prints and scheme for AM572x evm, for dual emac boards only)
+
++------------------------------------------------------------------+ u
+| +----------+  +----------+  +------+  +----------+  +----------+ | s
+| |          |  |          |  |      |  |          |  |          | | e
+| | App 1    |  | App 2    |  | Apps |  | App 3    |  | App 4    | | r
+| | Class A  |  | Class B  |  | Rest |  | Class B  |  | Class A  | |
+| | Eth0     |  | Eth0     |  |   |  |  | Eth1     |  | Eth1     | | s
+| | VLAN100  |  | VLAN100  |  |   |  |  | VLAN100  |  | VLAN100  | | p
+| | 40 Mb/s  |  | 20 Mb/s  |  |   |  |  | 10 Mb/s  |  | 30 Mb/s  | | a
+| | SO_PRI=3 |  | SO_PRI=2 |  |   |  |  | SO_PRI=3 |  | SO_PRI=2 | | c
+| |   |      |  |   |      |  |   |  |  |   |      |  |   |      | | e
+| +---|------+  +---|------+  +---|--+  +---|------+  +---|------+ |
++-----|-------------|-------------|---------|-------------|--------+
+    +-+     +-------+             |         +----------+  +----+
+    |       |             +-------+------+             |       |
+    |       |             |              |             |       |
++---|-------|-------------|--------------|-------------|-------|---+
+| +----+ +----+ +----+ +----+          +----+ +----+ +----+ +----+ |
+| | p3 | | p2 | | p1 | | p0 |          | p0 | | p1 | | p2 | | p3 | | k
+| \    / \    / \    / \    /          \    / \    / \    / \    / | e
+|  \  /   \  /   \  /   \  /            \  /   \  /   \  /   \  /  | r
+|   \/     \/     \/     \/              \/     \/     \/     \/   | n
+|   |      |             |                |             |      |   | e
+|   |      |        +----+                +----+        |      |   | l
+|   |      |        |                          |        |      |   |
+| +----+ +----+ +----+                        +----+ +----+ +----+ | s
+| |tc0 | |tc1 | |tc2 |                        |tc2 | |tc1 | |tc0 | | p
+| \    / \    / \    /                        \    / \    / \    / | a
+|  \  /   \  /   \  /                          \  /   \  /   \  /  | c
+|   \/     \/     \/                            \/     \/     \/   | e
+|   |      |       +-----+                +-----+      |       |   |
+|   |      |       |     |                |     |      |       |   |
+|   |      |       |     |                |     |      |       |   |
+|   |      |       |     |    E      E    |     |      |       |   |
+| +----+ +----+ +----+ +----+ t      t +----+ +----+ +----+ +----+ |
+| |txq0| |txq1| |txq4| |txq5| h      h |txq6| |txq7| |txq3| |txq2| |
+| \    / \    / \    / \    / 0      1 \    / \    / \    / \    / |
+|  \  /   \  /   \  /   \  /  .      .  \  /   \  /   \  /   \  /  |
+|   \/     \/     \/     \/   1      1   \/     \/     \/     \/   |
+| +-|------|------|------|--+ 0      0 +-|------|------|------|--+ |
+| | |      |      |      |  | 0      0 | |      |      |      |  | |
++---|------|------|------|---------------|------|------|------|----+
+    |      |      |      |               |      |      |      |
+    p      p      p      p               p      p      p      p
+    3      2      0-1, 4-7   <-L2 pri->  0-1, 4-7      2      3
+    |      |      |      |               |      |      |      |
+    |      |      |      |               |      |      |      |
++---|------|------|------|---------------|------|------|------|----+
+|   |      |      |      |               |      |      |      |    |
+| +----+ +----+ +----+ +----+          +----+ +----+ +----+ +----+ |
+| |dma7| |dma6| |dma3| |dma2|          |dma1| |dma0| |dma4| |dma5| |
+| \    / \    / \    / \    /          \    / \    / \    / \    / | c
+|  \S /   \S /   \  /   \  /            \  /   \  /   \S /   \S /  | p
+|   \/     \/     \/     \/              \/     \/     \/     \/   | s
+|   |      |      | +-----                |      |      |      |   | w
+|   |      |      | |                     +----+ |      |      |   |
+|   |      |      | |                          | |      |      |   | d
+| +----+ +----+ +----+p                      p+----+ +----+ +----+ | r
+| |    | |    | |    |o                      o|    | |    | |    | | i
+| | f3 | | f2 | | f0 |r        CPSW          r| f3 | | f2 | | f0 | | v
+| |tc0 | |tc1 | |tc2 |t                      t|tc0 | |tc1 | |tc2 | | e
+| \CBS / \CBS / \CBS /1                      2\CBS / \CBS / \CBS / | r
+|  \S /   \S /   \  /                          \S /   \S /   \  /  |
+|   \/     \/     \/                            \/     \/     \/   |
++------------------------------------------------------------------+
+========================================Eth==========================>
+
+1)
+// Add 8 tx queues, for interface Eth0, but they are common, so are accessed
+// by two interfaces Eth0 and Eth1.
+$ ethtool -L eth1 rx 1 tx 8
+rx unmodified, ignoring
+
+2)
+// Check if num of queues is set correctly:
+$ ethtool -l eth0
+Channel parameters for eth0:
+Pre-set maximums:
+RX:             8
+TX:             8
+Other:          0
+Combined:       0
+Current hardware settings:
+RX:             1
+TX:             8
+Other:          0
+Combined:       0
+
+3)
+// TX queues must be rated starting from 0, so set bws for tx0 and tx1 for Eth0
+// and for tx2 and tx3 for Eth1. That is, rates 40 and 20 Mb/s appropriately
+// for Eth0 and 30 and 10 Mb/s for Eth1.
+// Real speed can differ a bit due to discreetness
+// Leave last 4 tx queues as not rated
+$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate
+$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate
+$ echo 30 > /sys/class/net/eth1/queues/tx-2/tx_maxrate
+$ echo 10 > /sys/class/net/eth1/queues/tx-3/tx_maxrate
+
+4)
+// Check maximum rate of tx (cpdma) queues:
+$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate
+40
+20
+30
+10
+0
+0
+0
+0
+
+5)
+// Map skb->priority to traffic class for Eth0:
+// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+// Map traffic class to transmit queue:
+// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq4, txq5)
+$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
+map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@4 hw 1
+
+6)
+// Check classes settings
+$ tc -g class show dev eth0
++---(100:ffe2) mqprio
+|    +---(100:5) mqprio
+|    +---(100:6) mqprio
+|
++---(100:ffe1) mqprio
+|    +---(100:2) mqprio
+|
++---(100:ffe0) mqprio
+     +---(100:1) mqprio
+
+7)
+// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc for Eth0
+// here only idle slope is important, others ignored
+// Real speed can differ a bit due to discreetness
+$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1470 \
+hicredit 62 sendslope -959000 idleslope 41000 offload 1
+net eth0: set FIFO3 bw = 50
+
+8)
+// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc for Eth0
+$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1470 \
+hicredit 65 sendslope -979000 idleslope 21000 offload 1
+net eth0: set FIFO2 bw = 30
+
+9)
+// Create vlan 100 to map sk->priority to vlan qos for Eth0
+$ ip link add link eth0 name eth0.100 type vlan id 100
+net eth0: Adding vlanid 100 to vlan filter
+
+10)
+// Map skb->priority to L2 prio for Eth0.100, one to one
+$ ip link set eth0.100 type vlan \
+egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+11)
+// Check egress map for vlan 100
+$ cat /proc/net/vlan/eth0.100
+[...]
+INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
+EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+12)
+// Map skb->priority to traffic class for Eth1:
+// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+// Map traffic class to transmit queue:
+// tc0 -> txq2, tc1 -> txq3, tc2 -> (txq6, txq7)
+$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 3 \
+map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@2 1@3 2@6 hw 1
+
+13)
+// Check classes settings
+$ tc -g class show dev eth1
++---(100:ffe2) mqprio
+|    +---(100:7) mqprio
+|    +---(100:8) mqprio
+|
++---(100:ffe1) mqprio
+|    +---(100:4) mqprio
+|
++---(100:ffe0) mqprio
+     +---(100:3) mqprio
+
+14)
+// Set rate for class A - 31 Mbit (tc0, txq2) using CBS Qdisc for Eth1
+// here only idle slope is important, others ignored
+// Set it +1 Mb for reserve (important!)
+$ tc qdisc add dev eth1 parent 100:3 cbs locredit -1453 \
+hicredit 47 sendslope -969000 idleslope 31000 offload 1
+net eth1: set FIFO3 bw = 31
+
+15)
+// Set rate for class B - 11 Mbit (tc1, txq3) using CBS Qdisc for Eth1
+// Set it +1 Mb for reserve (important!)
+$ tc qdisc add dev eth1 parent 100:4 cbs locredit -1483 \
+hicredit 34 sendslope -989000 idleslope 11000 offload 1
+net eth1: set FIFO2 bw = 11
+
+16)
+// Create vlan 100 to map sk->priority to vlan qos for Eth1
+$ ip link add link eth1 name eth1.100 type vlan id 100
+net eth1: Adding vlanid 100 to vlan filter
+
+17)
+// Map skb->priority to L2 prio for Eth1.100, one to one
+$ ip link set eth1.100 type vlan \
+egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+18)
+// Check egress map for vlan 100
+$ cat /proc/net/vlan/eth1.100
+[...]
+INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
+EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+19)
+// Run appropriate tools with socket option "SO_PRIORITY" to 3
+// for class A and to 2 for class B. For both interfaces
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500&
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500&
+./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p2 -s 1500&
+./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p3 -s 1500&
+
+20)
+// run your listener on workstation (should be in same vlan)
+// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39000 kbps
+
+21)
+// Restore default configuration if needed
+$ ip link del eth1.100
+$ ip link del eth0.100
+$ tc qdisc del dev eth1 root
+net eth1: Prev FIFO2 is shaped
+net eth1: set FIFO3 bw = 0
+net eth1: set FIFO2 bw = 0
+$ tc qdisc del dev eth0 root
+net eth0: Prev FIFO2 is shaped
+net eth0: set FIFO3 bw = 0
+net eth0: set FIFO2 bw = 0
+$ ethtool -L eth0 rx 1 tx 1
index 36a2dded525b7a9b149c1f5d28d1bc19a46bceca..0de6f6145cc6deb10f862b139c0d2ae0add29199 100644 (file)
@@ -37,7 +37,7 @@ Procedure for submitting patches to the -stable tree
 
  - If the patch covers files in net/ or drivers/net please follow netdev stable
    submission guidelines as described in
-   Documentation/networking/netdev-FAQ.txt
+   :ref:`Documentation/networking/netdev-FAQ.rst <netdev-FAQ>`
  - Security patches should not be handled (solely) by the -stable review
    process but should follow the procedures in
    :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`.
index 908bb55be40790d9577ce5275eafa22d181d53ce..c0917107b90ab5f3b163f1eea3978a232c7c3287 100644 (file)
@@ -611,6 +611,7 @@ which stable kernel versions should receive your fix. This is the preferred
 method for indicating a bug fixed by the patch. See :ref:`describe_changes`
 for more details.
 
+.. _the_canonical_patch_format:
 
 14) The canonical patch format
 ------------------------------
index a289285d2412ed367983fdc3a5109d7041425410..7d3684e81df6ca03c29e86fd9d321bdaa4dc6f69 100644 (file)
@@ -9,7 +9,7 @@ rfkill - RF kill switch support
 Introduction
 ============
 
-The rfkill subsystem provides a generic interface to disabling any radio
+The rfkill subsystem provides a generic interface for disabling any radio
 transmitter in the system. When a transmitter is blocked, it shall not
 radiate any power.
 
@@ -45,7 +45,7 @@ The rfkill subsystem is composed of three main components:
  * the rfkill drivers.
 
 The rfkill core provides API for kernel drivers to register their radio
-transmitter with the kernel, methods for turning it on and off and, letting
+transmitter with the kernel, methods for turning it on and off, and letting
 the system know about hardware-disabled states that may be implemented on
 the device.
 
@@ -54,7 +54,7 @@ ways for userspace to query the current states. See the "Userspace support"
 section below.
 
 When the device is hard-blocked (either by a call to rfkill_set_hw_state()
-or from query_hw_block) set_block() will be invoked for additional software
+or from query_hw_block), set_block() will be invoked for additional software
 block, but drivers can ignore the method call since they can use the return
 value of the function rfkill_set_hw_state() to sync the software state
 instead of keeping track of calls to set_block(). In fact, drivers should
@@ -65,7 +65,6 @@ keeps track of soft and hard block separately.
 Kernel API
 ==========
 
-
 Drivers for radio transmitters normally implement an rfkill driver.
 
 Platform drivers might implement input devices if the rfkill button is just
@@ -75,14 +74,14 @@ a way to turn on/off the transmitter(s).
 
 For some platforms, it is possible that the hardware state changes during
 suspend/hibernation, in which case it will be necessary to update the rfkill
-core with the current state is at resume time.
+core with the current state at resume time.
 
 To create an rfkill driver, driver's Kconfig needs to have::
 
        depends on RFKILL || !RFKILL
 
 to ensure the driver cannot be built-in when rfkill is modular. The !RFKILL
-case allows the driver to be built when rfkill is not configured, which
+case allows the driver to be built when rfkill is not configured, in which
 case all rfkill API can still be used but will be provided by static inlines
 which compile to almost nothing.
 
@@ -91,7 +90,7 @@ rfkill drivers that control devices that can be hard-blocked unless they also
 assign the poll_hw_block() callback (then the rfkill core will poll the
 device). Don't do this unless you cannot get the event in any other way.
 
-RFKill provides per-switch LED triggers, which can be used to drive LEDs
+rfkill provides per-switch LED triggers, which can be used to drive LEDs
 according to the switch state (LED_FULL when blocked, LED_OFF otherwise).
 
 
@@ -114,7 +113,7 @@ a specified type) into a state which also updates the default state for
 hotplugged devices.
 
 After an application opens /dev/rfkill, it can read the current state of all
-devices. Changes can be either obtained by either polling the descriptor for
+devices. Changes can be obtained by either polling the descriptor for
 hotplug or state change events or by listening for uevents emitted by the
 rfkill core framework.
 
@@ -127,8 +126,7 @@ environment variables set::
        RFKILL_STATE
        RFKILL_TYPE
 
-The contents of these variables corresponds to the "name", "state" and
+The content of these variables corresponds to the "name", "state" and
 "type" sysfs files explained above.
 
-
 For further details consult Documentation/ABI/stable/sysfs-class-rfkill.
index 7cebd5bba8a8eb15305966228440f77174744a97..82f277462349c3b62eb0f0ba1a9a72283e3e037c 100644 (file)
@@ -9160,6 +9160,7 @@ S:        Supported
 W:     http://www.mellanox.com
 Q:     http://patchwork.ozlabs.org/project/netdev/list/
 F:     drivers/net/ethernet/mellanox/mlxsw/
+F:     tools/testing/selftests/drivers/net/mlxsw/
 
 MELLANOX FIRMWARE FLASH LIBRARY (mlxfw)
 M:     mlxsw@mellanox.com
@@ -12066,6 +12067,13 @@ S:     Maintained
 F:     sound/soc/codecs/rt*
 F:     include/sound/rt*.h
 
+REALTEK RTL83xx SMI DSA ROUTER CHIPS
+M:     Linus Walleij <linus.walleij@linaro.org>
+S:     Maintained
+F:     Documentation/devicetree/bindings/net/dsa/realtek-smi.txt
+F:     drivers/net/dsa/realtek-smi*
+F:     drivers/net/dsa/rtl83*
+
 REGISTER MAP ABSTRACTION
 M:     Mark Brown <broonie@kernel.org>
 L:     linux-kernel@vger.kernel.org
@@ -12173,6 +12181,8 @@ S:      Maintained
 F:     Documentation/rfkill.txt
 F:     Documentation/ABI/stable/sysfs-class-rfkill
 F:     net/rfkill/
+F:     include/linux/rfkill.h
+F:     include/uapi/linux/rfkill.h
 
 RHASHTABLE
 M:     Thomas Graf <tgraf@suug.ch>
@@ -12180,7 +12190,9 @@ M:      Herbert Xu <herbert@gondor.apana.org.au>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     lib/rhashtable.c
+F:     lib/test_rhashtable.c
 F:     include/linux/rhashtable.h
+F:     include/linux/rhashtable-types.h
 
 RICOH R5C592 MEMORYSTICK DRIVER
 M:     Maxim Levitsky <maximlevitsky@gmail.com>
index be14f16149d5faf64902441f39ba5c1a08be6104..065fb372e355cf86905fc1be80b97be5ef3218b3 100644 (file)
 
 #define SO_ZEROCOPY            60
 
+#define SO_TXTIME              61
+#define SCM_TXTIME             SO_TXTIME
+
 #endif /* _UAPI_ASM_SOCKET_H */
index fb5c954ab95a2ca98ac3223a2ce27e8ca7581dd8..6f258b50eb44262bcc6f5549ddfc168cb3e9dfa9 100644 (file)
                };
        };
 
+       /* This is a RealTek RTL8366RB switch and PHY using SMI over GPIO */
+       switch {
+               compatible = "realtek,rtl8366rb";
+               /* 22 = MDIO (has input reads), 21 = MDC (clock, output only) */
+               mdc-gpios = <&gpio0 21 GPIO_ACTIVE_HIGH>;
+               mdio-gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>;
+               reset-gpios = <&gpio0 14 GPIO_ACTIVE_LOW>;
+               realtek,disable-leds;
+
+               switch_intc: interrupt-controller {
+                       /* GPIO 15 provides the interrupt */
+                       interrupt-parent = <&gpio0>;
+                       interrupts = <15 IRQ_TYPE_LEVEL_LOW>;
+                       interrupt-controller;
+                       #address-cells = <0>;
+                       #interrupt-cells = <1>;
+               };
+
+               ports {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       port@0 {
+                               reg = <0>;
+                               label = "lan0";
+                               phy-handle = <&phy0>;
+                       };
+                       port@1 {
+                               reg = <1>;
+                               label = "lan1";
+                               phy-handle = <&phy1>;
+                       };
+                       port@2 {
+                               reg = <2>;
+                               label = "lan2";
+                               phy-handle = <&phy2>;
+                       };
+                       port@3 {
+                               reg = <3>;
+                               label = "lan3";
+                               phy-handle = <&phy3>;
+                       };
+                       port@4 {
+                               reg = <4>;
+                               label = "wan";
+                               phy-handle = <&phy4>;
+                       };
+                       rtl8366rb_cpu_port: port@5 {
+                               reg = <5>;
+                               label = "cpu";
+                               ethernet = <&gmac0>;
+                               phy-mode = "rgmii";
+                               fixed-link {
+                                       speed = <1000>;
+                                       full-duplex;
+                                       pause;
+                               };
+                       };
+
+               };
+
+               mdio {
+                       compatible = "realtek,smi-mdio";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       phy0: phy@0 {
+                               reg = <0>;
+                               interrupt-parent = <&switch_intc>;
+                               interrupts = <0>;
+                       };
+                       phy1: phy@1 {
+                               reg = <1>;
+                               interrupt-parent = <&switch_intc>;
+                               interrupts = <1>;
+                       };
+                       phy2: phy@2 {
+                               reg = <2>;
+                               interrupt-parent = <&switch_intc>;
+                               interrupts = <2>;
+                       };
+                       phy3: phy@3 {
+                               reg = <3>;
+                               interrupt-parent = <&switch_intc>;
+                               interrupts = <3>;
+                       };
+                       phy4: phy@4 {
+                               reg = <4>;
+                               interrupt-parent = <&switch_intc>;
+                               interrupts = <12>;
+                       };
+               };
+       };
+
        soc {
                flash@30000000 {
                        /*
                                 * gpio0bgrp cover line 7 used by WPS LED
                                 * gpio0cgrp cover line 8, 13 used by keys
                                 *           and 11, 12 used by the HD LEDs
+                                *           and line 14, 15 used by RTL8366
+                                *           RESET and phy ready
                                 * gpio0egrp cover line 16 used by VDISP
                                 * gpio0fgrp cover line 17 used by TK IRQ
                                 * gpio0ggrp cover line 20 used by panel CS
-                                * gpio0hgrp cover line 21,22 used by RTL8366RB
+                                * gpio0hgrp cover line 21,22 used by RTL8366RB MDIO
                                 */
                                gpio0_default_pins: pinctrl-gpio0 {
                                        mux {
                                                groups = "gpio1bgrp";
                                        };
                                };
+                               pinctrl-gmii {
+                                       mux {
+                                               function = "gmii";
+                                               groups = "gmii_gmac0_grp";
+                                       };
+                                       conf0 {
+                                               pins = "V8 GMAC0 RXDV", "T10 GMAC1 RXDV",
+                                                    "Y7 GMAC0 RXC", "Y11 GMAC1 RXC",
+                                                    "T8 GMAC0 TXEN", "W11 GMAC1 TXEN",
+                                                    "U8 GMAC0 TXC", "V11 GMAC1 TXC",
+                                                    "W8 GMAC0 RXD0", "V9 GMAC0 RXD1",
+                                                    "Y8 GMAC0 RXD2", "U9 GMAC0 RXD3",
+                                                    "T7 GMAC0 TXD0", "U6 GMAC0 TXD1",
+                                                    "V7 GMAC0 TXD2", "U7 GMAC0 TXD3",
+                                                    "Y12 GMAC1 RXD0", "V12 GMAC1 RXD1",
+                                                    "T11 GMAC1 RXD2", "W12 GMAC1 RXD3",
+                                                    "U10 GMAC1 TXD0", "Y10 GMAC1 TXD1",
+                                                    "W10 GMAC1 TXD2", "T9 GMAC1 TXD3";
+                                               skew-delay = <7>;
+                                       };
+                                       /* Set up drive strength on GMAC0 to 16 mA */
+                                       conf1 {
+                                               groups = "gmii_gmac0_grp";
+                                               drive-strength = <16>;
+                                       };
+                               };
                        };
                };
 
                                <0x6000 0 0 4 &pci_intc 2>;
                };
 
+               ethernet@60000000 {
+                       status = "okay";
+
+                       ethernet-port@0 {
+                               phy-mode = "rgmii";
+                               fixed-link {
+                                       speed = <1000>;
+                                       full-duplex;
+                                       pause;
+                               };
+                       };
+                       ethernet-port@1 {
+                               /* Not used in this platform */
+                       };
+               };
+
                ata@63000000 {
                        status = "okay";
                };
index f6a62ae44a65b61e162203ad261a7fbb5d4b34cf..25b3ee85066e16e95652b9963645dab61bcb7bbd 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/cacheflush.h>
 #include <asm/hwcap.h>
 #include <asm/opcodes.h>
+#include <asm/system_info.h>
 
 #include "bpf_jit_32.h"
 
  * The callee saved registers depends on whether frame pointers are enabled.
  * With frame pointers (to be compliant with the ABI):
  *
- *                                high
- * original ARM_SP =>     +------------------+ \
- *                        |        pc        | |
- * current ARM_FP =>      +------------------+ } callee saved registers
- *                        |r4-r8,r10,fp,ip,lr| |
- *                        +------------------+ /
- *                                low
+ *                              high
+ * original ARM_SP =>     +--------------+ \
+ *                        |      pc      | |
+ * current ARM_FP =>      +--------------+ } callee saved registers
+ *                        |r4-r9,fp,ip,lr| |
+ *                        +--------------+ /
+ *                              low
  *
  * Without frame pointers:
  *
- *                                high
- * original ARM_SP =>     +------------------+
- *                        | r4-r8,r10,fp,lr  | callee saved registers
- * current ARM_FP =>      +------------------+
- *                                low
+ *                              high
+ * original ARM_SP =>     +--------------+
+ *                        |  r4-r9,fp,lr | callee saved registers
+ * current ARM_FP =>      +--------------+
+ *                              low
  *
  * When popping registers off the stack at the end of a BPF function, we
  * reference them via the current ARM_FP register.
  */
 #define CALLEE_MASK    (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
-                        1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \
+                        1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \
                         1 << ARM_FP)
 #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
 #define CALLEE_POP_MASK  (CALLEE_MASK | 1 << ARM_PC)
 
-#define STACK_OFFSET(k)        (k)
+enum {
+       /* Stack layout - these are offsets from (top of stack - 4) */
+       BPF_R2_HI,
+       BPF_R2_LO,
+       BPF_R3_HI,
+       BPF_R3_LO,
+       BPF_R4_HI,
+       BPF_R4_LO,
+       BPF_R5_HI,
+       BPF_R5_LO,
+       BPF_R7_HI,
+       BPF_R7_LO,
+       BPF_R8_HI,
+       BPF_R8_LO,
+       BPF_R9_HI,
+       BPF_R9_LO,
+       BPF_FP_HI,
+       BPF_FP_LO,
+       BPF_TC_HI,
+       BPF_TC_LO,
+       BPF_AX_HI,
+       BPF_AX_LO,
+       /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
+        * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+        * BPF_REG_FP and Tail call counts.
+        */
+       BPF_JIT_SCRATCH_REGS,
+};
+
+/*
+ * Negative "register" values indicate the register is stored on the stack
+ * and are the offset from the top of the eBPF JIT scratch space.
+ */
+#define STACK_OFFSET(k)        (-4 - (k) * 4)
+#define SCRATCH_SIZE   (BPF_JIT_SCRATCH_REGS * 4)
+
+#ifdef CONFIG_FRAME_POINTER
+#define EBPF_SCRATCH_TO_ARM_FP(x) ((x) - 4 * hweight16(CALLEE_PUSH_MASK) - 4)
+#else
+#define EBPF_SCRATCH_TO_ARM_FP(x) (x)
+#endif
+
 #define TMP_REG_1      (MAX_BPF_JIT_REG + 0)   /* TEMP Register 1 */
 #define TMP_REG_2      (MAX_BPF_JIT_REG + 1)   /* TEMP Register 2 */
 #define TCALL_CNT      (MAX_BPF_JIT_REG + 2)   /* Tail Call Count */
  * scratch memory space and we have to build eBPF 64 bit register from those.
  *
  */
-static const u8 bpf2a32[][2] = {
+static const s8 bpf2a32[][2] = {
        /* return value from in-kernel function, and exit value from eBPF */
        [BPF_REG_0] = {ARM_R1, ARM_R0},
        /* arguments from eBPF program to in-kernel function */
        [BPF_REG_1] = {ARM_R3, ARM_R2},
        /* Stored on stack scratch space */
-       [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)},
-       [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)},
-       [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)},
-       [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)},
+       [BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)},
+       [BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)},
+       [BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)},
+       [BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)},
        /* callee saved registers that in-kernel function will preserve */
        [BPF_REG_6] = {ARM_R5, ARM_R4},
        /* Stored on stack scratch space */
-       [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)},
-       [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)},
-       [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)},
+       [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)},
+       [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)},
+       [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
        /* Read only Frame Pointer to access Stack */
-       [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)},
+       [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
        /* Temporary Register for internal BPF JIT, can be used
         * for constant blindings and others.
         */
        [TMP_REG_1] = {ARM_R7, ARM_R6},
-       [TMP_REG_2] = {ARM_R10, ARM_R8},
+       [TMP_REG_2] = {ARM_R9, ARM_R8},
        /* Tail call count. Stored on stack scratch space. */
-       [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)},
+       [TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)},
        /* temporary register for blinding constants.
         * Stored on stack scratch space.
         */
-       [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)},
+       [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)},
 };
 
 #define        dst_lo  dst[1]
@@ -151,6 +193,7 @@ struct jit_ctx {
        unsigned int idx;
        unsigned int prologue_bytes;
        unsigned int epilogue_offset;
+       unsigned int cpu_architecture;
        u32 flags;
        u32 *offsets;
        u32 *target;
@@ -195,10 +238,56 @@ static inline void emit(u32 inst, struct jit_ctx *ctx)
        _emit(ARM_COND_AL, inst, ctx);
 }
 
+/*
+ * This is rather horrid, but necessary to convert an integer constant
+ * to an immediate operand for the opcodes, and be able to detect at
+ * build time whether the constant can't be converted (iow, usable in
+ * BUILD_BUG_ON()).
+ */
+#define imm12val(v, s) (rol32(v, (s)) | (s) << 7)
+#define const_imm8m(x)                                 \
+       ({ int r;                                       \
+          u32 v = (x);                                 \
+          if (!(v & ~0x000000ff))                      \
+               r = imm12val(v, 0);                     \
+          else if (!(v & ~0xc000003f))                 \
+               r = imm12val(v, 2);                     \
+          else if (!(v & ~0xf000000f))                 \
+               r = imm12val(v, 4);                     \
+          else if (!(v & ~0xfc000003))                 \
+               r = imm12val(v, 6);                     \
+          else if (!(v & ~0xff000000))                 \
+               r = imm12val(v, 8);                     \
+          else if (!(v & ~0x3fc00000))                 \
+               r = imm12val(v, 10);                    \
+          else if (!(v & ~0x0ff00000))                 \
+               r = imm12val(v, 12);                    \
+          else if (!(v & ~0x03fc0000))                 \
+               r = imm12val(v, 14);                    \
+          else if (!(v & ~0x00ff0000))                 \
+               r = imm12val(v, 16);                    \
+          else if (!(v & ~0x003fc000))                 \
+               r = imm12val(v, 18);                    \
+          else if (!(v & ~0x000ff000))                 \
+               r = imm12val(v, 20);                    \
+          else if (!(v & ~0x0003fc00))                 \
+               r = imm12val(v, 22);                    \
+          else if (!(v & ~0x0000ff00))                 \
+               r = imm12val(v, 24);                    \
+          else if (!(v & ~0x00003fc0))                 \
+               r = imm12val(v, 26);                    \
+          else if (!(v & ~0x00000ff0))                 \
+               r = imm12val(v, 28);                    \
+          else if (!(v & ~0x000003fc))                 \
+               r = imm12val(v, 30);                    \
+          else                                         \
+               r = -1;                                 \
+          r; })
+
 /*
  * Checks if immediate value can be converted to imm12(12 bits) value.
  */
-static int16_t imm8m(u32 x)
+static int imm8m(u32 x)
 {
        u32 rot;
 
@@ -208,6 +297,38 @@ static int16_t imm8m(u32 x)
        return -1;
 }
 
+#define imm8m(x) (__builtin_constant_p(x) ? const_imm8m(x) : imm8m(x))
+
+static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12)
+{
+       op |= rt << 12 | rn << 16;
+       if (imm12 >= 0)
+               op |= ARM_INST_LDST__U;
+       else
+               imm12 = -imm12;
+       return op | (imm12 & ARM_INST_LDST__IMM12);
+}
+
+static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8)
+{
+       op |= rt << 12 | rn << 16;
+       if (imm8 >= 0)
+               op |= ARM_INST_LDST__U;
+       else
+               imm8 = -imm8;
+       return op | (imm8 & 0xf0) << 4 | (imm8 & 0x0f);
+}
+
+#define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off)
+#define ARM_LDRB_I(rt, rn, off)        arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off)
+#define ARM_LDRD_I(rt, rn, off)        arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off)
+#define ARM_LDRH_I(rt, rn, off)        arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off)
+
+#define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off)
+#define ARM_STRB_I(rt, rn, off)        arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off)
+#define ARM_STRD_I(rt, rn, off)        arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off)
+#define ARM_STRH_I(rt, rn, off)        arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off)
+
 /*
  * Initializes the JIT space with undefined instructions.
  */
@@ -227,19 +348,10 @@ static void jit_fill_hole(void *area, unsigned int size)
 #define STACK_ALIGNMENT        4
 #endif
 
-/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
- * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
- * BPF_REG_FP and Tail call counts.
- */
-#define SCRATCH_SIZE 80
-
 /* total stack size used in JITed code */
 #define _STACK_SIZE    (ctx->prog->aux->stack_depth + SCRATCH_SIZE)
 #define STACK_SIZE     ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
 
-/* Get the offset of eBPF REGISTERs stored on scratch space. */
-#define STACK_VAR(off) (STACK_SIZE - off)
-
 #if __LINUX_ARM_ARCH__ < 7
 
 static u16 imm_offset(u32 k, struct jit_ctx *ctx)
@@ -355,7 +467,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
 
 static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
 {
-       const u8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp = bpf2a32[TMP_REG_1];
 
 #if __LINUX_ARM_ARCH__ == 7
        if (elf_hwcap & HWCAP_IDIVA) {
@@ -402,44 +514,110 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
                emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
 }
 
-/* Checks whether BPF register is on scratch stack space or not. */
-static inline bool is_on_stack(u8 bpf_reg)
+/* Is the translated BPF register on stack? */
+static bool is_stacked(s8 reg)
+{
+       return reg < 0;
+}
+
+/* If a BPF register is on the stack (stk is true), load it to the
+ * supplied temporary register and return the temporary register
+ * for subsequent operations, otherwise just use the CPU register.
+ */
+static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx)
+{
+       if (is_stacked(reg)) {
+               emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx);
+               reg = tmp;
+       }
+       return reg;
+}
+
+static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp,
+                                  struct jit_ctx *ctx)
 {
-       static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5,
-                               BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT,
-                               BPF_REG_2, BPF_REG_FP};
-       int i, reg_len = sizeof(stack_regs);
-
-       for (i = 0 ; i < reg_len ; i++) {
-               if (bpf_reg == stack_regs[i])
-                       return true;
+       if (is_stacked(reg[1])) {
+               if (__LINUX_ARM_ARCH__ >= 6 ||
+                   ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) {
+                       emit(ARM_LDRD_I(tmp[1], ARM_FP,
+                                       EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
+               } else {
+                       emit(ARM_LDR_I(tmp[1], ARM_FP,
+                                      EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
+                       emit(ARM_LDR_I(tmp[0], ARM_FP,
+                                      EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx);
+               }
+               reg = tmp;
+       }
+       return reg;
+}
+
+/* If a BPF register is on the stack (stk is true), save the register
+ * back to the stack.  If the source register is not the same, then
+ * move it into the correct register.
+ */
+static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx)
+{
+       if (is_stacked(reg))
+               emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx);
+       else if (reg != src)
+               emit(ARM_MOV_R(reg, src), ctx);
+}
+
+static void arm_bpf_put_reg64(const s8 *reg, const s8 *src,
+                             struct jit_ctx *ctx)
+{
+       if (is_stacked(reg[1])) {
+               if (__LINUX_ARM_ARCH__ >= 6 ||
+                   ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) {
+                       emit(ARM_STRD_I(src[1], ARM_FP,
+                                      EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
+               } else {
+                       emit(ARM_STR_I(src[1], ARM_FP,
+                                      EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
+                       emit(ARM_STR_I(src[0], ARM_FP,
+                                      EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx);
+               }
+       } else {
+               if (reg[1] != src[1])
+                       emit(ARM_MOV_R(reg[1], src[1]), ctx);
+               if (reg[0] != src[0])
+                       emit(ARM_MOV_R(reg[0], src[0]), ctx);
        }
-       return false;
 }
 
-static inline void emit_a32_mov_i(const u8 dst, const u32 val,
-                                 bool dstk, struct jit_ctx *ctx)
+static inline void emit_a32_mov_i(const s8 dst, const u32 val,
+                                 struct jit_ctx *ctx)
 {
-       const u8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp = bpf2a32[TMP_REG_1];
 
-       if (dstk) {
+       if (is_stacked(dst)) {
                emit_mov_i(tmp[1], val, ctx);
-               emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx);
+               arm_bpf_put_reg32(dst, tmp[1], ctx);
        } else {
                emit_mov_i(dst, val, ctx);
        }
 }
 
+static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx)
+{
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
+
+       emit_mov_i(rd[1], (u32)val, ctx);
+       emit_mov_i(rd[0], val >> 32, ctx);
+
+       arm_bpf_put_reg64(dst, rd, ctx);
+}
+
 /* Sign extended move */
-static inline void emit_a32_mov_i64(const bool is64, const u8 dst[],
-                                 const u32 val, bool dstk,
-                                 struct jit_ctx *ctx) {
-       u32 hi = 0;
+static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[],
+                                      const u32 val, struct jit_ctx *ctx) {
+       u64 val64 = val;
 
        if (is64 && (val & (1<<31)))
-               hi = (u32)~0;
-       emit_a32_mov_i(dst_lo, val, dstk, ctx);
-       emit_a32_mov_i(dst_hi, hi, dstk, ctx);
+               val64 |= 0xffffffff00000000ULL;
+       emit_a32_mov_i64(dst, val64, ctx);
 }
 
 static inline void emit_a32_add_r(const u8 dst, const u8 src,
@@ -521,75 +699,94 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
 /* ALU operation (32 bit)
  * dst = dst (op) src
  */
-static inline void emit_a32_alu_r(const u8 dst, const u8 src,
-                                 bool dstk, bool sstk,
+static inline void emit_a32_alu_r(const s8 dst, const s8 src,
                                  struct jit_ctx *ctx, const bool is64,
                                  const bool hi, const u8 op) {
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       u8 rn = sstk ? tmp[1] : src;
-
-       if (sstk)
-               emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx);
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       s8 rn, rd;
 
+       rn = arm_bpf_get_reg32(src, tmp[1], ctx);
+       rd = arm_bpf_get_reg32(dst, tmp[0], ctx);
        /* ALU operation */
-       if (dstk) {
-               emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
-               emit_alu_r(tmp[0], rn, is64, hi, op, ctx);
-               emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
-       } else {
-               emit_alu_r(dst, rn, is64, hi, op, ctx);
-       }
+       emit_alu_r(rd, rn, is64, hi, op, ctx);
+       arm_bpf_put_reg32(dst, rd, ctx);
 }
 
 /* ALU operation (64 bit) */
-static inline void emit_a32_alu_r64(const bool is64, const u8 dst[],
-                                 const u8 src[], bool dstk,
-                                 bool sstk, struct jit_ctx *ctx,
+static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
+                                 const s8 src[], struct jit_ctx *ctx,
                                  const u8 op) {
-       emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op);
-       if (is64)
-               emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op);
-       else
-               emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *rd;
+
+       rd = arm_bpf_get_reg64(dst, tmp, ctx);
+       if (is64) {
+               const s8 *rs;
+
+               rs = arm_bpf_get_reg64(src, tmp2, ctx);
+
+               /* ALU operation */
+               emit_alu_r(rd[1], rs[1], true, false, op, ctx);
+               emit_alu_r(rd[0], rs[0], true, true, op, ctx);
+       } else {
+               s8 rs;
+
+               rs = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+
+               /* ALU operation */
+               emit_alu_r(rd[1], rs, true, false, op, ctx);
+               emit_a32_mov_i(rd[0], 0, ctx);
+       }
+
+       arm_bpf_put_reg64(dst, rd, ctx);
 }
 
-/* dst = imm (4 bytes)*/
-static inline void emit_a32_mov_r(const u8 dst, const u8 src,
-                                 bool dstk, bool sstk,
+/* dst = src (4 bytes)*/
+static inline void emit_a32_mov_r(const s8 dst, const s8 src,
                                  struct jit_ctx *ctx) {
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       u8 rt = sstk ? tmp[0] : src;
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       s8 rt;
 
-       if (sstk)
-               emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx);
-       if (dstk)
-               emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx);
-       else
-               emit(ARM_MOV_R(dst, rt), ctx);
+       rt = arm_bpf_get_reg32(src, tmp[0], ctx);
+       arm_bpf_put_reg32(dst, rt, ctx);
 }
 
 /* dst = src */
-static inline void emit_a32_mov_r64(const bool is64, const u8 dst[],
-                                 const u8 src[], bool dstk,
-                                 bool sstk, struct jit_ctx *ctx) {
-       emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx);
-       if (is64) {
+static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
+                                 const s8 src[],
+                                 struct jit_ctx *ctx) {
+       if (!is64) {
+               emit_a32_mov_r(dst_lo, src_lo, ctx);
+               /* Zero out high 4 bytes */
+               emit_a32_mov_i(dst_hi, 0, ctx);
+       } else if (__LINUX_ARM_ARCH__ < 6 &&
+                  ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
                /* complete 8 byte move */
-               emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx);
+               emit_a32_mov_r(dst_lo, src_lo, ctx);
+               emit_a32_mov_r(dst_hi, src_hi, ctx);
+       } else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
+               const u8 *tmp = bpf2a32[TMP_REG_1];
+
+               emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx);
+               emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx);
+       } else if (is_stacked(src_lo)) {
+               emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx);
+       } else if (is_stacked(dst_lo)) {
+               emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx);
        } else {
-               /* Zero out high 4 bytes */
-               emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+               emit(ARM_MOV_R(dst[0], src[0]), ctx);
+               emit(ARM_MOV_R(dst[1], src[1]), ctx);
        }
 }
 
 /* Shift operations */
-static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk,
+static inline void emit_a32_alu_i(const s8 dst, const u32 val,
                                struct jit_ctx *ctx, const u8 op) {
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       u8 rd = dstk ? tmp[0] : dst;
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       s8 rd;
 
-       if (dstk)
-               emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+       rd = arm_bpf_get_reg32(dst, tmp[0], ctx);
 
        /* Do shift operation */
        switch (op) {
@@ -604,303 +801,245 @@ static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk,
                break;
        }
 
-       if (dstk)
-               emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+       arm_bpf_put_reg32(dst, rd, ctx);
 }
 
 /* dst = ~dst (64 bit) */
-static inline void emit_a32_neg64(const u8 dst[], bool dstk,
+static inline void emit_a32_neg64(const s8 dst[],
                                struct jit_ctx *ctx){
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       u8 rd = dstk ? tmp[1] : dst[1];
-       u8 rm = dstk ? tmp[0] : dst[0];
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *rd;
 
        /* Setup Operand */
-       if (dstk) {
-               emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       rd = arm_bpf_get_reg64(dst, tmp, ctx);
 
        /* Do Negate Operation */
-       emit(ARM_RSBS_I(rd, rd, 0), ctx);
-       emit(ARM_RSC_I(rm, rm, 0), ctx);
+       emit(ARM_RSBS_I(rd[1], rd[1], 0), ctx);
+       emit(ARM_RSC_I(rd[0], rd[0], 0), ctx);
 
-       if (dstk) {
-               emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       arm_bpf_put_reg64(dst, rd, ctx);
 }
 
 /* dst = dst << src */
-static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
-                                   bool sstk, struct jit_ctx *ctx) {
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
+static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[],
+                                   struct jit_ctx *ctx) {
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *rd;
+       s8 rt;
 
        /* Setup Operands */
-       u8 rt = sstk ? tmp2[1] : src_lo;
-       u8 rd = dstk ? tmp[1] : dst_lo;
-       u8 rm = dstk ? tmp[0] : dst_hi;
-
-       if (sstk)
-               emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
-       if (dstk) {
-               emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+       rd = arm_bpf_get_reg64(dst, tmp, ctx);
 
        /* Do LSH operation */
        emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
        emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
-       emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
-       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
-       emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
-       emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx);
-
-       if (dstk) {
-               emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       } else {
-               emit(ARM_MOV_R(rd, ARM_LR), ctx);
-               emit(ARM_MOV_R(rm, ARM_IP), ctx);
-       }
+       emit(ARM_MOV_SR(ARM_LR, rd[0], SRTYPE_ASL, rt), ctx);
+       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[1], SRTYPE_ASL, ARM_IP), ctx);
+       emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd[1], SRTYPE_LSR, tmp2[0]), ctx);
+       emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_ASL, rt), ctx);
+
+       arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
+       arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
 }
 
 /* dst = dst >> src (signed)*/
-static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
-                                   bool sstk, struct jit_ctx *ctx) {
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
+static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[],
+                                    struct jit_ctx *ctx) {
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *rd;
+       s8 rt;
+
        /* Setup Operands */
-       u8 rt = sstk ? tmp2[1] : src_lo;
-       u8 rd = dstk ? tmp[1] : dst_lo;
-       u8 rm = dstk ? tmp[0] : dst_hi;
-
-       if (sstk)
-               emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
-       if (dstk) {
-               emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+       rd = arm_bpf_get_reg64(dst, tmp, ctx);
 
        /* Do the ARSH operation */
        emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
        emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
-       emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
-       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+       emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
+       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
        _emit(ARM_COND_MI, ARM_B(0), ctx);
-       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx);
-       emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx);
-       if (dstk) {
-               emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       } else {
-               emit(ARM_MOV_R(rd, ARM_LR), ctx);
-               emit(ARM_MOV_R(rm, ARM_IP), ctx);
-       }
+       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
+       emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx);
+
+       arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
+       arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
 }
 
 /* dst = dst >> src */
-static inline void emit_a32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
-                                    bool sstk, struct jit_ctx *ctx) {
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
+static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[],
+                                   struct jit_ctx *ctx) {
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *rd;
+       s8 rt;
+
        /* Setup Operands */
-       u8 rt = sstk ? tmp2[1] : src_lo;
-       u8 rd = dstk ? tmp[1] : dst_lo;
-       u8 rm = dstk ? tmp[0] : dst_hi;
-
-       if (sstk)
-               emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
-       if (dstk) {
-               emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+       rd = arm_bpf_get_reg64(dst, tmp, ctx);
 
        /* Do RSH operation */
        emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
        emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
-       emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
-       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
-       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
-       emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx);
-       if (dstk) {
-               emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       } else {
-               emit(ARM_MOV_R(rd, ARM_LR), ctx);
-               emit(ARM_MOV_R(rm, ARM_IP), ctx);
-       }
+       emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
+       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
+       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_LSR, tmp2[0]), ctx);
+       emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_LSR, rt), ctx);
+
+       arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
+       arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
 }
 
 /* dst = dst << val */
-static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk,
-                                    const u32 val, struct jit_ctx *ctx){
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
-       /* Setup operands */
-       u8 rd = dstk ? tmp[1] : dst_lo;
-       u8 rm = dstk ? tmp[0] : dst_hi;
+static inline void emit_a32_lsh_i64(const s8 dst[],
+                                   const u32 val, struct jit_ctx *ctx){
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *rd;
 
-       if (dstk) {
-               emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       /* Setup operands */
+       rd = arm_bpf_get_reg64(dst, tmp, ctx);
 
        /* Do LSH operation */
        if (val < 32) {
-               emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx);
-               emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx);
-               emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx);
+               emit(ARM_MOV_SI(tmp2[0], rd[0], SRTYPE_ASL, val), ctx);
+               emit(ARM_ORR_SI(rd[0], tmp2[0], rd[1], SRTYPE_LSR, 32 - val), ctx);
+               emit(ARM_MOV_SI(rd[1], rd[1], SRTYPE_ASL, val), ctx);
        } else {
                if (val == 32)
-                       emit(ARM_MOV_R(rm, rd), ctx);
+                       emit(ARM_MOV_R(rd[0], rd[1]), ctx);
                else
-                       emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx);
-               emit(ARM_EOR_R(rd, rd, rd), ctx);
+                       emit(ARM_MOV_SI(rd[0], rd[1], SRTYPE_ASL, val - 32), ctx);
+               emit(ARM_EOR_R(rd[1], rd[1], rd[1]), ctx);
        }
 
-       if (dstk) {
-               emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       arm_bpf_put_reg64(dst, rd, ctx);
 }
 
 /* dst = dst >> val */
-static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk,
+static inline void emit_a32_rsh_i64(const s8 dst[],
                                    const u32 val, struct jit_ctx *ctx) {
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
-       /* Setup operands */
-       u8 rd = dstk ? tmp[1] : dst_lo;
-       u8 rm = dstk ? tmp[0] : dst_hi;
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *rd;
 
-       if (dstk) {
-               emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       /* Setup operands */
+       rd = arm_bpf_get_reg64(dst, tmp, ctx);
 
        /* Do LSR operation */
        if (val < 32) {
-               emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx);
-               emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx);
-               emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx);
+               emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
+               emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
+               emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx);
        } else if (val == 32) {
-               emit(ARM_MOV_R(rd, rm), ctx);
-               emit(ARM_MOV_I(rm, 0), ctx);
+               emit(ARM_MOV_R(rd[1], rd[0]), ctx);
+               emit(ARM_MOV_I(rd[0], 0), ctx);
        } else {
-               emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx);
-               emit(ARM_MOV_I(rm, 0), ctx);
+               emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_LSR, val - 32), ctx);
+               emit(ARM_MOV_I(rd[0], 0), ctx);
        }
 
-       if (dstk) {
-               emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       arm_bpf_put_reg64(dst, rd, ctx);
 }
 
 /* dst = dst >> val (signed) */
-static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk,
+static inline void emit_a32_arsh_i64(const s8 dst[],
                                     const u32 val, struct jit_ctx *ctx){
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
-        /* Setup operands */
-       u8 rd = dstk ? tmp[1] : dst_lo;
-       u8 rm = dstk ? tmp[0] : dst_hi;
-
-       if (dstk) {
-               emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *rd;
+
+       /* Setup operands */
+       rd = arm_bpf_get_reg64(dst, tmp, ctx);
 
        /* Do ARSH operation */
        if (val < 32) {
-               emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx);
-               emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx);
-               emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx);
+               emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
+               emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
+               emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx);
        } else if (val == 32) {
-               emit(ARM_MOV_R(rd, rm), ctx);
-               emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx);
+               emit(ARM_MOV_R(rd[1], rd[0]), ctx);
+               emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx);
        } else {
-               emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx);
-               emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx);
+               emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_ASR, val - 32), ctx);
+               emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx);
        }
 
-       if (dstk) {
-               emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
+       arm_bpf_put_reg64(dst, rd, ctx);
 }
 
-static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
-                                   bool sstk, struct jit_ctx *ctx) {
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
+static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[],
+                                   struct jit_ctx *ctx) {
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *rd, *rt;
+
        /* Setup operands for multiplication */
-       u8 rd = dstk ? tmp[1] : dst_lo;
-       u8 rm = dstk ? tmp[0] : dst_hi;
-       u8 rt = sstk ? tmp2[1] : src_lo;
-       u8 rn = sstk ? tmp2[0] : src_hi;
-
-       if (dstk) {
-               emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       }
-       if (sstk) {
-               emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
-               emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx);
-       }
+       rd = arm_bpf_get_reg64(dst, tmp, ctx);
+       rt = arm_bpf_get_reg64(src, tmp2, ctx);
 
        /* Do Multiplication */
-       emit(ARM_MUL(ARM_IP, rd, rn), ctx);
-       emit(ARM_MUL(ARM_LR, rm, rt), ctx);
+       emit(ARM_MUL(ARM_IP, rd[1], rt[0]), ctx);
+       emit(ARM_MUL(ARM_LR, rd[0], rt[1]), ctx);
        emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
 
-       emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
-       emit(ARM_ADD_R(rm, ARM_LR, rm), ctx);
-       if (dstk) {
-               emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
-       } else {
-               emit(ARM_MOV_R(rd, ARM_IP), ctx);
-       }
+       emit(ARM_UMULL(ARM_IP, rd[0], rd[1], rt[1]), ctx);
+       emit(ARM_ADD_R(rd[0], ARM_LR, rd[0]), ctx);
+
+       arm_bpf_put_reg32(dst_lo, ARM_IP, ctx);
+       arm_bpf_put_reg32(dst_hi, rd[0], ctx);
 }
 
 /* *(size *)(dst + off) = src */
-static inline void emit_str_r(const u8 dst, const u8 src, bool dstk,
-                             const s32 off, struct jit_ctx *ctx, const u8 sz){
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       u8 rd = dstk ? tmp[1] : dst;
-
-       if (dstk)
-               emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
-       if (off) {
-               emit_a32_mov_i(tmp[0], off, false, ctx);
-               emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx);
+static inline void emit_str_r(const s8 dst, const s8 src[],
+                             s32 off, struct jit_ctx *ctx, const u8 sz){
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       s32 off_max;
+       s8 rd;
+
+       rd = arm_bpf_get_reg32(dst, tmp[1], ctx);
+
+       if (sz == BPF_H)
+               off_max = 0xff;
+       else
+               off_max = 0xfff;
+
+       if (off < 0 || off > off_max) {
+               emit_a32_mov_i(tmp[0], off, ctx);
+               emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx);
                rd = tmp[0];
+               off = 0;
        }
        switch (sz) {
-       case BPF_W:
-               /* Store a Word */
-               emit(ARM_STR_I(src, rd, 0), ctx);
+       case BPF_B:
+               /* Store a Byte */
+               emit(ARM_STRB_I(src_lo, rd, off), ctx);
                break;
        case BPF_H:
                /* Store a HalfWord */
-               emit(ARM_STRH_I(src, rd, 0), ctx);
+               emit(ARM_STRH_I(src_lo, rd, off), ctx);
                break;
-       case BPF_B:
-               /* Store a Byte */
-               emit(ARM_STRB_I(src, rd, 0), ctx);
+       case BPF_W:
+               /* Store a Word */
+               emit(ARM_STR_I(src_lo, rd, off), ctx);
+               break;
+       case BPF_DW:
+               /* Store a Double Word */
+               emit(ARM_STR_I(src_lo, rd, off), ctx);
+               emit(ARM_STR_I(src_hi, rd, off + 4), ctx);
                break;
        }
 }
 
 /* dst = *(size*)(src + off) */
-static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
+static inline void emit_ldx_r(const s8 dst[], const s8 src,
                              s32 off, struct jit_ctx *ctx, const u8 sz){
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       const u8 *rd = dstk ? tmp : dst;
-       u8 rm = src;
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
+       s8 rm = src;
        s32 off_max;
 
        if (sz == BPF_H)
@@ -909,7 +1048,7 @@ static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
                off_max = 0xfff;
 
        if (off < 0 || off > off_max) {
-               emit_a32_mov_i(tmp[0], off, false, ctx);
+               emit_a32_mov_i(tmp[0], off, ctx);
                emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
                rm = tmp[0];
                off = 0;
@@ -921,17 +1060,17 @@ static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
        case BPF_B:
                /* Load a Byte */
                emit(ARM_LDRB_I(rd[1], rm, off), ctx);
-               emit_a32_mov_i(dst[0], 0, dstk, ctx);
+               emit_a32_mov_i(rd[0], 0, ctx);
                break;
        case BPF_H:
                /* Load a HalfWord */
                emit(ARM_LDRH_I(rd[1], rm, off), ctx);
-               emit_a32_mov_i(dst[0], 0, dstk, ctx);
+               emit_a32_mov_i(rd[0], 0, ctx);
                break;
        case BPF_W:
                /* Load a Word */
                emit(ARM_LDR_I(rd[1], rm, off), ctx);
-               emit_a32_mov_i(dst[0], 0, dstk, ctx);
+               emit_a32_mov_i(rd[0], 0, ctx);
                break;
        case BPF_DW:
                /* Load a Double Word */
@@ -939,10 +1078,7 @@ static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
                emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
                break;
        }
-       if (dstk)
-               emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx);
-       if (dstk && sz == BPF_DW)
-               emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx);
+       arm_bpf_put_reg64(dst, rd, ctx);
 }
 
 /* Arithmatic Operation */
@@ -981,64 +1117,66 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 {
 
        /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
-       const u8 *r2 = bpf2a32[BPF_REG_2];
-       const u8 *r3 = bpf2a32[BPF_REG_3];
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
-       const u8 *tcc = bpf2a32[TCALL_CNT];
+       const s8 *r2 = bpf2a32[BPF_REG_2];
+       const s8 *r3 = bpf2a32[BPF_REG_3];
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *tcc = bpf2a32[TCALL_CNT];
+       const s8 *tc;
        const int idx0 = ctx->idx;
 #define cur_offset (ctx->idx - idx0)
 #define jmp_offset (out_offset - (cur_offset) - 2)
-       u32 off, lo, hi;
+       u32 lo, hi;
+       s8 r_array, r_index;
+       int off;
 
        /* if (index >= array->map.max_entries)
         *      goto out;
         */
+       BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) >
+                    ARM_INST_LDST__IMM12);
        off = offsetof(struct bpf_array, map.max_entries);
-       /* array->map.max_entries */
-       emit_a32_mov_i(tmp[1], off, false, ctx);
-       emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
-       emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
+       r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx);
        /* index is 32-bit for arrays */
-       emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
+       r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx);
+       /* array->map.max_entries */
+       emit(ARM_LDR_I(tmp[1], r_array, off), ctx);
        /* index >= array->map.max_entries */
-       emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
+       emit(ARM_CMP_R(r_index, tmp[1]), ctx);
        _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
 
+       /* tmp2[0] = array, tmp2[1] = index */
+
        /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
         *      goto out;
         * tail_call_cnt++;
         */
        lo = (u32)MAX_TAIL_CALL_CNT;
        hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
-       emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx);
-       emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx);
-       emit(ARM_CMP_I(tmp[0], hi), ctx);
-       _emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx);
+       tc = arm_bpf_get_reg64(tcc, tmp, ctx);
+       emit(ARM_CMP_I(tc[0], hi), ctx);
+       _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx);
        _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
-       emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx);
-       emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx);
-       emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx);
-       emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx);
+       emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx);
+       emit(ARM_ADC_I(tc[0], tc[0], 0), ctx);
+       arm_bpf_put_reg64(tcc, tmp, ctx);
 
        /* prog = array->ptrs[index]
         * if (prog == NULL)
         *      goto out;
         */
-       off = offsetof(struct bpf_array, ptrs);
-       emit_a32_mov_i(tmp[1], off, false, ctx);
-       emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
-       emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx);
-       emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
-       emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx);
-       emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx);
+       BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0);
+       off = imm8m(offsetof(struct bpf_array, ptrs));
+       emit(ARM_ADD_I(tmp[1], r_array, off), ctx);
+       emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx);
        emit(ARM_CMP_I(tmp[1], 0), ctx);
        _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
 
        /* goto *(prog->bpf_func + prologue_size); */
+       BUILD_BUG_ON(offsetof(struct bpf_prog, bpf_func) >
+                    ARM_INST_LDST__IMM12);
        off = offsetof(struct bpf_prog, bpf_func);
-       emit_a32_mov_i(tmp2[1], off, false, ctx);
-       emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
+       emit(ARM_LDR_I(tmp[1], tmp[1], off), ctx);
        emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
        emit_bx_r(tmp[1], ctx);
 
@@ -1059,7 +1197,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx)
 {
 #if __LINUX_ARM_ARCH__ < 6
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
 
        emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
        emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx);
@@ -1074,7 +1212,7 @@ static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx)
 static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx)
 {
 #if __LINUX_ARM_ARCH__ < 6
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
 
        emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
        emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx);
@@ -1094,28 +1232,27 @@ static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx)
 }
 
 // push the scratch stack register on top of the stack
-static inline void emit_push_r64(const u8 src[], const u8 shift,
-               struct jit_ctx *ctx)
+static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx)
 {
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *rt;
        u16 reg_set = 0;
 
-       emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx);
-       emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx);
+       rt = arm_bpf_get_reg64(src, tmp2, ctx);
 
-       reg_set = (1 << tmp2[1]) | (1 << tmp2[0]);
+       reg_set = (1 << rt[1]) | (1 << rt[0]);
        emit(ARM_PUSH(reg_set), ctx);
 }
 
 static void build_prologue(struct jit_ctx *ctx)
 {
-       const u8 r0 = bpf2a32[BPF_REG_0][1];
-       const u8 r2 = bpf2a32[BPF_REG_1][1];
-       const u8 r3 = bpf2a32[BPF_REG_1][0];
-       const u8 r4 = bpf2a32[BPF_REG_6][1];
-       const u8 fplo = bpf2a32[BPF_REG_FP][1];
-       const u8 fphi = bpf2a32[BPF_REG_FP][0];
-       const u8 *tcc = bpf2a32[TCALL_CNT];
+       const s8 r0 = bpf2a32[BPF_REG_0][1];
+       const s8 r2 = bpf2a32[BPF_REG_1][1];
+       const s8 r3 = bpf2a32[BPF_REG_1][0];
+       const s8 r4 = bpf2a32[BPF_REG_6][1];
+       const s8 fplo = bpf2a32[BPF_REG_FP][1];
+       const s8 fphi = bpf2a32[BPF_REG_FP][0];
+       const s8 *tcc = bpf2a32[TCALL_CNT];
 
        /* Save callee saved registers. */
 #ifdef CONFIG_FRAME_POINTER
@@ -1136,8 +1273,8 @@ static void build_prologue(struct jit_ctx *ctx)
        emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
 
        /* Set up BPF prog stack base register */
-       emit_a32_mov_r(fplo, ARM_IP, true, false, ctx);
-       emit_a32_mov_i(fphi, 0, true, ctx);
+       emit_a32_mov_r(fplo, ARM_IP, ctx);
+       emit_a32_mov_i(fphi, 0, ctx);
 
        /* mov r4, 0 */
        emit(ARM_MOV_I(r4, 0), ctx);
@@ -1146,8 +1283,8 @@ static void build_prologue(struct jit_ctx *ctx)
        emit(ARM_MOV_R(r3, r4), ctx);
        emit(ARM_MOV_R(r2, r0), ctx);
        /* Initialize Tail Count */
-       emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx);
-       emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx);
+       emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx);
+       emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx);
        /* end of prologue */
 }
 
@@ -1178,17 +1315,16 @@ static void build_epilogue(struct jit_ctx *ctx)
 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 {
        const u8 code = insn->code;
-       const u8 *dst = bpf2a32[insn->dst_reg];
-       const u8 *src = bpf2a32[insn->src_reg];
-       const u8 *tmp = bpf2a32[TMP_REG_1];
-       const u8 *tmp2 = bpf2a32[TMP_REG_2];
+       const s8 *dst = bpf2a32[insn->dst_reg];
+       const s8 *src = bpf2a32[insn->src_reg];
+       const s8 *tmp = bpf2a32[TMP_REG_1];
+       const s8 *tmp2 = bpf2a32[TMP_REG_2];
        const s16 off = insn->off;
        const s32 imm = insn->imm;
        const int i = insn - ctx->prog->insnsi;
        const bool is64 = BPF_CLASS(code) == BPF_ALU64;
-       const bool dstk = is_on_stack(insn->dst_reg);
-       const bool sstk = is_on_stack(insn->src_reg);
-       u8 rd, rt, rm, rn;
+       const s8 *rd, *rs;
+       s8 rd_lo, rt, rm, rn;
        s32 jmp_offset;
 
 #define check_imm(bits, imm) do {                              \
@@ -1211,11 +1347,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
        case BPF_ALU64 | BPF_MOV | BPF_X:
                switch (BPF_SRC(code)) {
                case BPF_X:
-                       emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx);
+                       emit_a32_mov_r64(is64, dst, src, ctx);
                        break;
                case BPF_K:
                        /* Sign-extend immediate value to destination reg */
-                       emit_a32_mov_i64(is64, dst, imm, dstk, ctx);
+                       emit_a32_mov_se_i64(is64, dst, imm, ctx);
                        break;
                }
                break;
@@ -1255,8 +1391,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
        case BPF_ALU64 | BPF_XOR | BPF_X:
                switch (BPF_SRC(code)) {
                case BPF_X:
-                       emit_a32_alu_r64(is64, dst, src, dstk, sstk,
-                                        ctx, BPF_OP(code));
+                       emit_a32_alu_r64(is64, dst, src, ctx, BPF_OP(code));
                        break;
                case BPF_K:
                        /* Move immediate value to the temporary register
@@ -1265,9 +1400,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                         * value into temporary reg and then it would be
                         * safe to do the operation on it.
                         */
-                       emit_a32_mov_i64(is64, tmp2, imm, false, ctx);
-                       emit_a32_alu_r64(is64, dst, tmp2, dstk, false,
-                                        ctx, BPF_OP(code));
+                       emit_a32_mov_se_i64(is64, tmp2, imm, ctx);
+                       emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code));
                        break;
                }
                break;
@@ -1277,26 +1411,22 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
        case BPF_ALU | BPF_DIV | BPF_X:
        case BPF_ALU | BPF_MOD | BPF_K:
        case BPF_ALU | BPF_MOD | BPF_X:
-               rt = src_lo;
-               rd = dstk ? tmp2[1] : dst_lo;
-               if (dstk)
-                       emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+               rd_lo = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx);
                switch (BPF_SRC(code)) {
                case BPF_X:
-                       rt = sstk ? tmp2[0] : rt;
-                       if (sstk)
-                               emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)),
-                                    ctx);
+                       rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx);
                        break;
                case BPF_K:
                        rt = tmp2[0];
-                       emit_a32_mov_i(rt, imm, false, ctx);
+                       emit_a32_mov_i(rt, imm, ctx);
+                       break;
+               default:
+                       rt = src_lo;
                        break;
                }
-               emit_udivmod(rd, rd, rt, ctx, BPF_OP(code));
-               if (dstk)
-                       emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
-               emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+               emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
+               arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
+               emit_a32_mov_i(dst_hi, 0, ctx);
                break;
        case BPF_ALU64 | BPF_DIV | BPF_K:
        case BPF_ALU64 | BPF_DIV | BPF_X:
@@ -1310,54 +1440,54 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                if (unlikely(imm > 31))
                        return -EINVAL;
                if (imm)
-                       emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code));
-               emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+                       emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code));
+               emit_a32_mov_i(dst_hi, 0, ctx);
                break;
        /* dst = dst << imm */
        case BPF_ALU64 | BPF_LSH | BPF_K:
                if (unlikely(imm > 63))
                        return -EINVAL;
-               emit_a32_lsh_i64(dst, dstk, imm, ctx);
+               emit_a32_lsh_i64(dst, imm, ctx);
                break;
        /* dst = dst >> imm */
        case BPF_ALU64 | BPF_RSH | BPF_K:
                if (unlikely(imm > 63))
                        return -EINVAL;
-               emit_a32_rsh_i64(dst, dstk, imm, ctx);
+               emit_a32_rsh_i64(dst, imm, ctx);
                break;
        /* dst = dst << src */
        case BPF_ALU64 | BPF_LSH | BPF_X:
-               emit_a32_lsh_r64(dst, src, dstk, sstk, ctx);
+               emit_a32_lsh_r64(dst, src, ctx);
                break;
        /* dst = dst >> src */
        case BPF_ALU64 | BPF_RSH | BPF_X:
-               emit_a32_rsh_r64(dst, src, dstk, sstk, ctx);
+               emit_a32_rsh_r64(dst, src, ctx);
                break;
        /* dst = dst >> src (signed) */
        case BPF_ALU64 | BPF_ARSH | BPF_X:
-               emit_a32_arsh_r64(dst, src, dstk, sstk, ctx);
+               emit_a32_arsh_r64(dst, src, ctx);
                break;
        /* dst = dst >> imm (signed) */
        case BPF_ALU64 | BPF_ARSH | BPF_K:
                if (unlikely(imm > 63))
                        return -EINVAL;
-               emit_a32_arsh_i64(dst, dstk, imm, ctx);
+               emit_a32_arsh_i64(dst, imm, ctx);
                break;
        /* dst = ~dst */
        case BPF_ALU | BPF_NEG:
-               emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code));
-               emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+               emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code));
+               emit_a32_mov_i(dst_hi, 0, ctx);
                break;
        /* dst = ~dst (64 bit) */
        case BPF_ALU64 | BPF_NEG:
-               emit_a32_neg64(dst, dstk, ctx);
+               emit_a32_neg64(dst, ctx);
                break;
        /* dst = dst * src/imm */
        case BPF_ALU64 | BPF_MUL | BPF_X:
        case BPF_ALU64 | BPF_MUL | BPF_K:
                switch (BPF_SRC(code)) {
                case BPF_X:
-                       emit_a32_mul_r64(dst, src, dstk, sstk, ctx);
+                       emit_a32_mul_r64(dst, src, ctx);
                        break;
                case BPF_K:
                        /* Move immediate value to the temporary register
@@ -1366,8 +1496,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                         * reg then it would be safe to do the operation
                         * on it.
                         */
-                       emit_a32_mov_i64(is64, tmp2, imm, false, ctx);
-                       emit_a32_mul_r64(dst, tmp2, dstk, false, ctx);
+                       emit_a32_mov_se_i64(is64, tmp2, imm, ctx);
+                       emit_a32_mul_r64(dst, tmp2, ctx);
                        break;
                }
                break;
@@ -1375,25 +1505,20 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
        /* dst = htobe(dst) */
        case BPF_ALU | BPF_END | BPF_FROM_LE:
        case BPF_ALU | BPF_END | BPF_FROM_BE:
-               rd = dstk ? tmp[0] : dst_hi;
-               rt = dstk ? tmp[1] : dst_lo;
-               if (dstk) {
-                       emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
-                       emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
-               }
+               rd = arm_bpf_get_reg64(dst, tmp, ctx);
                if (BPF_SRC(code) == BPF_FROM_LE)
                        goto emit_bswap_uxt;
                switch (imm) {
                case 16:
-                       emit_rev16(rt, rt, ctx);
+                       emit_rev16(rd[1], rd[1], ctx);
                        goto emit_bswap_uxt;
                case 32:
-                       emit_rev32(rt, rt, ctx);
+                       emit_rev32(rd[1], rd[1], ctx);
                        goto emit_bswap_uxt;
                case 64:
-                       emit_rev32(ARM_LR, rt, ctx);
-                       emit_rev32(rt, rd, ctx);
-                       emit(ARM_MOV_R(rd, ARM_LR), ctx);
+                       emit_rev32(ARM_LR, rd[1], ctx);
+                       emit_rev32(rd[1], rd[0], ctx);
+                       emit(ARM_MOV_R(rd[0], ARM_LR), ctx);
                        break;
                }
                goto exit;
@@ -1402,36 +1527,30 @@ emit_bswap_uxt:
                case 16:
                        /* zero-extend 16 bits into 64 bits */
 #if __LINUX_ARM_ARCH__ < 6
-                       emit_a32_mov_i(tmp2[1], 0xffff, false, ctx);
-                       emit(ARM_AND_R(rt, rt, tmp2[1]), ctx);
+                       emit_a32_mov_i(tmp2[1], 0xffff, ctx);
+                       emit(ARM_AND_R(rd[1], rd[1], tmp2[1]), ctx);
 #else /* ARMv6+ */
-                       emit(ARM_UXTH(rt, rt), ctx);
+                       emit(ARM_UXTH(rd[1], rd[1]), ctx);
 #endif
-                       emit(ARM_EOR_R(rd, rd, rd), ctx);
+                       emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
                        break;
                case 32:
                        /* zero-extend 32 bits into 64 bits */
-                       emit(ARM_EOR_R(rd, rd, rd), ctx);
+                       emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
                        break;
                case 64:
                        /* nop */
                        break;
                }
 exit:
-               if (dstk) {
-                       emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
-                       emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
-               }
+               arm_bpf_put_reg64(dst, rd, ctx);
                break;
        /* dst = imm64 */
        case BPF_LD | BPF_IMM | BPF_DW:
        {
-               const struct bpf_insn insn1 = insn[1];
-               u32 hi, lo = imm;
+               u64 val = (u32)imm | (u64)insn[1].imm << 32;
 
-               hi = insn1.imm;
-               emit_a32_mov_i(dst_lo, lo, dstk, ctx);
-               emit_a32_mov_i(dst_hi, hi, dstk, ctx);
+               emit_a32_mov_i64(dst, val, ctx);
 
                return 1;
        }
@@ -1440,10 +1559,8 @@ exit:
        case BPF_LDX | BPF_MEM | BPF_H:
        case BPF_LDX | BPF_MEM | BPF_B:
        case BPF_LDX | BPF_MEM | BPF_DW:
-               rn = sstk ? tmp2[1] : src_lo;
-               if (sstk)
-                       emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
-               emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
+               rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+               emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
                break;
        /* ST: *(size *)(dst + off) = imm */
        case BPF_ST | BPF_MEM | BPF_W:
@@ -1453,18 +1570,15 @@ exit:
                switch (BPF_SIZE(code)) {
                case BPF_DW:
                        /* Sign-extend immediate value into temp reg */
-                       emit_a32_mov_i64(true, tmp2, imm, false, ctx);
-                       emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W);
-                       emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W);
+                       emit_a32_mov_se_i64(true, tmp2, imm, ctx);
                        break;
                case BPF_W:
                case BPF_H:
                case BPF_B:
-                       emit_a32_mov_i(tmp2[1], imm, false, ctx);
-                       emit_str_r(dst_lo, tmp2[1], dstk, off, ctx,
-                                  BPF_SIZE(code));
+                       emit_a32_mov_i(tmp2[1], imm, ctx);
                        break;
                }
+               emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code));
                break;
        /* STX XADD: lock *(u32 *)(dst + off) += src */
        case BPF_STX | BPF_XADD | BPF_W:
@@ -1476,25 +1590,9 @@ exit:
        case BPF_STX | BPF_MEM | BPF_H:
        case BPF_STX | BPF_MEM | BPF_B:
        case BPF_STX | BPF_MEM | BPF_DW:
-       {
-               u8 sz = BPF_SIZE(code);
-
-               rn = sstk ? tmp2[1] : src_lo;
-               rm = sstk ? tmp2[0] : src_hi;
-               if (sstk) {
-                       emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
-                       emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx);
-               }
-
-               /* Store the value */
-               if (BPF_SIZE(code) == BPF_DW) {
-                       emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W);
-                       emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W);
-               } else {
-                       emit_str_r(dst_lo, rn, dstk, off, ctx, sz);
-               }
+               rs = arm_bpf_get_reg64(src, tmp2, ctx);
+               emit_str_r(dst_lo, rs, off, ctx, BPF_SIZE(code));
                break;
-       }
        /* PC += off if dst == src */
        /* PC += off if dst > src */
        /* PC += off if dst >= src */
@@ -1518,12 +1616,8 @@ exit:
        case BPF_JMP | BPF_JSLT | BPF_X:
        case BPF_JMP | BPF_JSLE | BPF_X:
                /* Setup source registers */
-               rm = sstk ? tmp2[0] : src_hi;
-               rn = sstk ? tmp2[1] : src_lo;
-               if (sstk) {
-                       emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
-                       emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx);
-               }
+               rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx);
+               rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
                goto go_jmp;
        /* PC += off if dst == imm */
        /* PC += off if dst > imm */
@@ -1552,18 +1646,13 @@ exit:
                rm = tmp2[0];
                rn = tmp2[1];
                /* Sign-extend immediate value */
-               emit_a32_mov_i64(true, tmp2, imm, false, ctx);
+               emit_a32_mov_se_i64(true, tmp2, imm, ctx);
 go_jmp:
                /* Setup destination register */
-               rd = dstk ? tmp[0] : dst_hi;
-               rt = dstk ? tmp[1] : dst_lo;
-               if (dstk) {
-                       emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
-                       emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
-               }
+               rd = arm_bpf_get_reg64(dst, tmp, ctx);
 
                /* Check for the condition */
-               emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code));
+               emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code));
 
                /* Setup JUMP instruction */
                jmp_offset = bpf2a32_offset(i+off, i, ctx);
@@ -1619,21 +1708,21 @@ go_jmp:
        /* function call */
        case BPF_JMP | BPF_CALL:
        {
-               const u8 *r0 = bpf2a32[BPF_REG_0];
-               const u8 *r1 = bpf2a32[BPF_REG_1];
-               const u8 *r2 = bpf2a32[BPF_REG_2];
-               const u8 *r3 = bpf2a32[BPF_REG_3];
-               const u8 *r4 = bpf2a32[BPF_REG_4];
-               const u8 *r5 = bpf2a32[BPF_REG_5];
+               const s8 *r0 = bpf2a32[BPF_REG_0];
+               const s8 *r1 = bpf2a32[BPF_REG_1];
+               const s8 *r2 = bpf2a32[BPF_REG_2];
+               const s8 *r3 = bpf2a32[BPF_REG_3];
+               const s8 *r4 = bpf2a32[BPF_REG_4];
+               const s8 *r5 = bpf2a32[BPF_REG_5];
                const u32 func = (u32)__bpf_call_base + (u32)imm;
 
-               emit_a32_mov_r64(true, r0, r1, false, false, ctx);
-               emit_a32_mov_r64(true, r1, r2, false, true, ctx);
-               emit_push_r64(r5, 0, ctx);
-               emit_push_r64(r4, 8, ctx);
-               emit_push_r64(r3, 16, ctx);
+               emit_a32_mov_r64(true, r0, r1, ctx);
+               emit_a32_mov_r64(true, r1, r2, ctx);
+               emit_push_r64(r5, ctx);
+               emit_push_r64(r4, ctx);
+               emit_push_r64(r3, ctx);
 
-               emit_a32_mov_i(tmp[1], func, false, ctx);
+               emit_a32_mov_i(tmp[1], func, ctx);
                emit_blx_r(tmp[1], ctx);
 
                emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean
@@ -1745,6 +1834,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
        memset(&ctx, 0, sizeof(ctx));
        ctx.prog = prog;
+       ctx.cpu_architecture = cpu_architecture();
 
        /* Not able to allocate memory for offsets[] , then
         * we must fall back to the interpreter
index d5cf5f6208aa7c993a191564699c00216762c0bb..f4e58bcdaa43825f8b7f0c75dde8b360f6ff5a39 100644 (file)
 #define ARM_INST_EOR_R         0x00200000
 #define ARM_INST_EOR_I         0x02200000
 
-#define ARM_INST_LDRB_I                0x05d00000
+#define ARM_INST_LDST__U       0x00800000
+#define ARM_INST_LDST__IMM12   0x00000fff
+#define ARM_INST_LDRB_I                0x05500000
 #define ARM_INST_LDRB_R                0x07d00000
-#define ARM_INST_LDRH_I                0x01d000b0
+#define ARM_INST_LDRD_I                0x014000d0
+#define ARM_INST_LDRH_I                0x015000b0
 #define ARM_INST_LDRH_R                0x019000b0
-#define ARM_INST_LDR_I         0x05900000
+#define ARM_INST_LDR_I         0x05100000
 #define ARM_INST_LDR_R         0x07900000
 
 #define ARM_INST_LDM           0x08900000
 #define ARM_INST_SBC_R         0x00c00000
 #define ARM_INST_SBCS_R                0x00d00000
 
-#define ARM_INST_STR_I         0x05800000
-#define ARM_INST_STRB_I                0x05c00000
-#define ARM_INST_STRH_I                0x01c000b0
+#define ARM_INST_STR_I         0x05000000
+#define ARM_INST_STRB_I                0x05400000
+#define ARM_INST_STRD_I                0x014000f0
+#define ARM_INST_STRH_I                0x014000b0
 
 #define ARM_INST_TST_R         0x01100000
 #define ARM_INST_TST_I         0x03100000
 #define ARM_EOR_R(rd, rn, rm)  _AL3_R(ARM_INST_EOR, rd, rn, rm)
 #define ARM_EOR_I(rd, rn, imm) _AL3_I(ARM_INST_EOR, rd, rn, imm)
 
-#define ARM_LDR_I(rt, rn, off) (ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \
-                                | ((off) & 0xfff))
-#define ARM_LDR_R(rt, rn, rm)  (ARM_INST_LDR_R | (rt) << 12 | (rn) << 16 \
+#define ARM_LDR_R(rt, rn, rm)  (ARM_INST_LDR_R | ARM_INST_LDST__U \
+                                | (rt) << 12 | (rn) << 16 \
                                 | (rm))
-#define ARM_LDRB_I(rt, rn, off)        (ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \
-                                | (off))
-#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \
+#define ARM_LDR_R_SI(rt, rn, rm, type, imm) \
+                               (ARM_INST_LDR_R | ARM_INST_LDST__U \
+                                | (rt) << 12 | (rn) << 16 \
+                                | (imm) << 7 | (type) << 5 | (rm))
+#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | ARM_INST_LDST__U \
+                                | (rt) << 12 | (rn) << 16 \
                                 | (rm))
-#define ARM_LDRH_I(rt, rn, off)        (ARM_INST_LDRH_I | (rt) << 12 | (rn) << 16 \
-                                | (((off) & 0xf0) << 4) | ((off) & 0xf))
-#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | (rt) << 12 | (rn) << 16 \
+#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | ARM_INST_LDST__U \
+                                | (rt) << 12 | (rn) << 16 \
                                 | (rm))
 
 #define ARM_LDM(rn, regs)      (ARM_INST_LDM | (rn) << 16 | (regs))
 #define ARM_SUBS_I(rd, rn, imm)        _AL3_I(ARM_INST_SUBS, rd, rn, imm)
 #define ARM_SBC_I(rd, rn, imm) _AL3_I(ARM_INST_SBC, rd, rn, imm)
 
-#define ARM_STR_I(rt, rn, off) (ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \
-                                | ((off) & 0xfff))
-#define ARM_STRH_I(rt, rn, off)        (ARM_INST_STRH_I | (rt) << 12 | (rn) << 16 \
-                                | (((off) & 0xf0) << 4) | ((off) & 0xf))
-#define ARM_STRB_I(rt, rn, off)        (ARM_INST_STRB_I | (rt) << 12 | (rn) << 16 \
-                                | (((off) & 0xf0) << 4) | ((off) & 0xf))
-
 #define ARM_TST_R(rn, rm)      _AL3_R(ARM_INST_TST, 0, rn, rm)
 #define ARM_TST_I(rn, imm)     _AL3_I(ARM_INST_TST, 0, rn, imm)
 
index 4057197048dcbbacaee733c6067cc677fd1ad54d..1a406a76c86a2ae7ae465c192b08739009e9d891 100644 (file)
                        status = "disabled";
                };
 
-               mdio_mux_iproc: mdio-mux@6602023c {
+               mdio_mux_iproc: mdio-mux@66020000 {
                        compatible = "brcm,mdio-mux-iproc";
-                       reg = <0x6602023c 0x14>;
+                       reg = <0x66020000 0x250>;
                        #address-cells = <1>;
                        #size-cells = <0>;
 
index b203152ad67ca18b4421bb035b2d13d32d7f9be5..a70e8ddbd66f5b836fa5e51e369d7829b0ec0501 100644 (file)
 
                #include "stingray-pinctrl.dtsi"
 
-               mdio_mux_iproc: mdio-mux@2023c {
+               mdio_mux_iproc: mdio-mux@20000 {
                        compatible = "brcm,mdio-mux-iproc";
-                       reg = <0x0002023c 0x14>;
+                       reg = <0x00020000 0x250>;
                        #address-cells = <1>;
                        #size-cells = <0>;
 
index 4dd06767f839e07d588fbf34465f828b55949775..a56a408e9bf754802c2f0cfc26633d9f927dff52 100644 (file)
@@ -11,13 +11,14 @@ fman0: fman@1a00000 {
        #size-cells = <1>;
        cell-index = <0>;
        compatible = "fsl,fman";
-       ranges = <0x0 0x0 0x1a00000 0x100000>;
-       reg = <0x0 0x1a00000 0x0 0x100000>;
+       ranges = <0x0 0x0 0x1a00000 0xfe000>;
+       reg = <0x0 0x1a00000 0x0 0xfe000>;
        interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>,
                     <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
        clocks = <&clockgen 3 0>;
        clock-names = "fmanclk";
        fsl,qman-channel-range = <0x800 0x10>;
+       ptimer-handle = <&ptp_timer0>;
 
        muram@0 {
                compatible = "fsl,fman-muram";
@@ -73,9 +74,10 @@ fman0: fman@1a00000 {
                compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
                reg = <0xfd000 0x1000>;
        };
+};
 
-       ptp_timer0: ptp-timer@fe000 {
-               compatible = "fsl,fman-ptp-timer";
-               reg = <0xfe000 0x1000>;
-       };
+ptp_timer0: ptp-timer@1afe000 {
+       compatible = "fsl,fman-ptp-timer";
+       reg = <0x0 0x1afe000 0x0 0x1000>;
+       interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
 };
index 3efba40adc5411ce5981ce611390c084ce7eddf0..c872c4e6bafb64c4334810f73475eb21845481a6 100644 (file)
 
 #define SO_ZEROCOPY            60
 
+#define SO_TXTIME              61
+#define SCM_TXTIME             SO_TXTIME
+
 #endif /* _ASM_IA64_SOCKET_H */
index 49c3d47959637a8653067bc903fbcb8b0bb16e1e..71370fb3ceef4ee4c235876bd1ac7056cff7f1e8 100644 (file)
 
 #define SO_ZEROCOPY            60
 
+#define SO_TXTIME              61
+#define SCM_TXTIME             SO_TXTIME
+
 #endif /* _UAPI_ASM_SOCKET_H */
index 1d0fdc3b5d228279b3863bd581f56fc8ed2cbe9c..061b9cf2a77988a6b82eb47f3d1fd1a53e3264a2 100644 (file)
 
 #define SO_ZEROCOPY            0x4035
 
+#define SO_TXTIME              0x4036
+#define SCM_TXTIME             SO_TXTIME
+
 #endif /* _UAPI_ASM_SOCKET_H */
index abd01d466de4211953865fc6398fb09aa26efc1e..6b124f73f67ab441488b4e0ca50bbb4a8c450ce0 100644 (file)
@@ -37,12 +37,13 @@ fman0: fman@400000 {
        #size-cells = <1>;
        cell-index = <0>;
        compatible = "fsl,fman";
-       ranges = <0 0x400000 0x100000>;
-       reg = <0x400000 0x100000>;
+       ranges = <0 0x400000 0xfe000>;
+       reg = <0x400000 0xfe000>;
        interrupts = <96 2 0 0>, <16 2 1 1>;
        clocks = <&clockgen 3 0>;
        clock-names = "fmanclk";
        fsl,qman-channel-range = <0x40 0xc>;
+       ptimer-handle = <&ptp_timer0>;
 
        muram@0 {
                compatible = "fsl,fman-muram";
@@ -93,9 +94,10 @@ fman0: fman@400000 {
                reg = <0x87000 0x1000>;
                status = "disabled";
        };
+};
 
-       ptp_timer0: ptp-timer@fe000 {
-               compatible = "fsl,fman-ptp-timer";
-               reg = <0xfe000 0x1000>;
-       };
+ptp_timer0: ptp-timer@4fe000 {
+       compatible = "fsl,fman-ptp-timer";
+       reg = <0x4fe000 0x1000>;
+       interrupts = <96 2 0 0>;
 };
index debea75fd3f0e54b3f8662d61fbe718decafb2ad..b80aaf5f00a1939467ffd4890d7ce5cfab473444 100644 (file)
@@ -37,12 +37,13 @@ fman1: fman@500000 {
        #size-cells = <1>;
        cell-index = <1>;
        compatible = "fsl,fman";
-       ranges = <0 0x500000 0x100000>;
-       reg = <0x500000 0x100000>;
+       ranges = <0 0x500000 0xfe000>;
+       reg = <0x500000 0xfe000>;
        interrupts = <97 2 0 0>, <16 2 1 0>;
        clocks = <&clockgen 3 1>;
        clock-names = "fmanclk";
        fsl,qman-channel-range = <0x60 0xc>;
+       ptimer-handle = <&ptp_timer1>;
 
        muram@0 {
                compatible = "fsl,fman-muram";
@@ -93,9 +94,10 @@ fman1: fman@500000 {
                reg = <0x87000 0x1000>;
                status = "disabled";
        };
+};
 
-       ptp_timer1: ptp-timer@fe000 {
-               compatible = "fsl,fman-ptp-timer";
-               reg = <0xfe000 0x1000>;
-       };
+ptp_timer1: ptp-timer@5fe000 {
+       compatible = "fsl,fman-ptp-timer";
+       reg = <0x5fe000 0x1000>;
+       interrupts = <97 2 0 0>;
 };
index 3a20e0d1a6d26cc20c2bd97afc5c5a220a72c6b6..d3720fdde26ca11ce11e4abdff9f76a0b5b3bc6c 100644 (file)
@@ -37,12 +37,13 @@ fman0: fman@400000 {
        #size-cells = <1>;
        cell-index = <0>;
        compatible = "fsl,fman";
-       ranges = <0 0x400000 0x100000>;
-       reg = <0x400000 0x100000>;
+       ranges = <0 0x400000 0xfe000>;
+       reg = <0x400000 0xfe000>;
        interrupts = <96 2 0 0>, <16 2 1 1>;
        clocks = <&clockgen 3 0>;
        clock-names = "fmanclk";
        fsl,qman-channel-range = <0x800 0x10>;
+       ptimer-handle = <&ptp_timer0>;
 
        muram@0 {
                compatible = "fsl,fman-muram";
@@ -98,9 +99,10 @@ fman0: fman@400000 {
                compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
                reg = <0xfd000 0x1000>;
        };
+};
 
-       ptp_timer0: ptp-timer@fe000 {
-               compatible = "fsl,fman-ptp-timer";
-               reg = <0xfe000 0x1000>;
-       };
+ptp_timer0: ptp-timer@4fe000 {
+       compatible = "fsl,fman-ptp-timer";
+       reg = <0x4fe000 0x1000>;
+       interrupts = <96 2 0 0>;
 };
index 82750ac944c7be227ff8c3801fffb1f2bdeb197a..ae34c204a5bccd2f39931b1982ca1c4428dbde10 100644 (file)
@@ -37,12 +37,13 @@ fman1: fman@500000 {
        #size-cells = <1>;
        cell-index = <1>;
        compatible = "fsl,fman";
-       ranges = <0 0x500000 0x100000>;
-       reg = <0x500000 0x100000>;
+       ranges = <0 0x500000 0xfe000>;
+       reg = <0x500000 0xfe000>;
        interrupts = <97 2 0 0>, <16 2 1 0>;
        clocks = <&clockgen 3 1>;
        clock-names = "fmanclk";
        fsl,qman-channel-range = <0x820 0x10>;
+       ptimer-handle = <&ptp_timer1>;
 
        muram@0 {
                compatible = "fsl,fman-muram";
@@ -98,9 +99,10 @@ fman1: fman@500000 {
                compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
                reg = <0xfd000 0x1000>;
        };
+};
 
-       ptp_timer1: ptp-timer@fe000 {
-               compatible = "fsl,fman-ptp-timer";
-               reg = <0xfe000 0x1000>;
-       };
+ptp_timer1: ptp-timer@5fe000 {
+       compatible = "fsl,fman-ptp-timer";
+       reg = <0x5fe000 0x1000>;
+       interrupts = <97 2 0 0>;
 };
index 7f60b60601764007ba7cccfad2394b3db91524af..02f2755842ccab93a7588714ea7272f7f67785ab 100644 (file)
@@ -37,12 +37,13 @@ fman0: fman@400000 {
        #size-cells = <1>;
        cell-index = <0>;
        compatible = "fsl,fman";
-       ranges = <0 0x400000 0x100000>;
-       reg = <0x400000 0x100000>;
+       ranges = <0 0x400000 0xfe000>;
+       reg = <0x400000 0xfe000>;
        interrupts = <96 2 0 0>, <16 2 1 1>;
        clocks = <&clockgen 3 0>;
        clock-names = "fmanclk";
        fsl,qman-channel-range = <0x800 0x10>;
+       ptimer-handle = <&ptp_timer0>;
 
        muram@0 {
                compatible = "fsl,fman-muram";
@@ -86,9 +87,10 @@ fman0: fman@400000 {
                compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
                reg = <0xfd000 0x1000>;
        };
+};
 
-       ptp_timer0: ptp-timer@fe000 {
-               compatible = "fsl,fman-ptp-timer";
-               reg = <0xfe000 0x1000>;
-       };
+ptp_timer0: ptp-timer@4fe000 {
+       compatible = "fsl,fman-ptp-timer";
+       reg = <0x4fe000 0x1000>;
+       interrupts = <96 2 0 0>;
 };
index 3510c0fd06f4004aeccc4d1e21dded8f5ca8a00a..39d901476ee5d351f1e34a66f0828ca9d9fe522b 100644 (file)
 
 #define SO_ZEROCOPY            60
 
+#define SO_TXTIME              61
+#define SCM_TXTIME             SO_TXTIME
+
 #endif /* _ASM_SOCKET_H */
index d58520c2e6ff2c77788d3546d17c960fd067714f..7ea35e5601b6bed654363456c4127f28beb7cada 100644 (file)
 
 #define SO_ZEROCOPY            0x003e
 
+#define SO_TXTIME              0x003f
+#define SCM_TXTIME             SO_TXTIME
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION             0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT       0x5002
index 749856a2e736738feab416e3980054e5b5a699b3..9af1a21265d3b83995565fe3a2d2b4fe22a8a647 100644 (file)
@@ -146,6 +146,7 @@ static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
        int ret;
 
        kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
+                                 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
                                  0, rft->kf_ops, rft, NULL, NULL);
        if (IS_ERR(kn))
                return PTR_ERR(kn);
@@ -1503,7 +1504,8 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
        struct kernfs_node *kn;
        int ret = 0;
 
-       kn = __kernfs_create_file(parent_kn, name, 0444, 0,
+       kn = __kernfs_create_file(parent_kn, name, 0444,
+                                 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
                                  &kf_mondata_ops, priv, NULL, NULL);
        if (IS_ERR(kn))
                return PTR_ERR(kn);
index 75a07b8119a96b9eea2258c7d235972552265f46..1de07a7f76806984e504bc03638df7e0610a3455 100644 (file)
 
 #define SO_ZEROCOPY            60
 
+#define SO_TXTIME              61
+#define SCM_TXTIME             SO_TXTIME
+
 #endif /* _XTENSA_SOCKET_H */
index c166f424871c86a356b15eff8bdd3b2be6406a87..b053179e0bc532e019b249eb0b96ba76fc3bb103 100644 (file)
@@ -1071,7 +1071,7 @@ __poll_t af_alg_poll(struct file *file, struct socket *sock,
        struct af_alg_ctx *ctx = ask->private;
        __poll_t mask;
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
        mask = 0;
 
        if (!ctx->more || ctx->used)
index 2c288d1f42bba0fcdf31ccec72c069bfa60688b9..e89146ddede693a400b0d7019f14ca92aa4dff8e 100644 (file)
@@ -1385,14 +1385,12 @@ static void zatm_close(struct atm_vcc *vcc)
 
 static int zatm_open(struct atm_vcc *vcc)
 {
-       struct zatm_dev *zatm_dev;
        struct zatm_vcc *zatm_vcc;
        short vpi = vcc->vpi;
        int vci = vcc->vci;
        int error;
 
        DPRINTK(">zatm_open\n");
-       zatm_dev = ZATM_DEV(vcc->dev);
        if (!test_bit(ATM_VF_PARTIAL,&vcc->flags))
                vcc->dev_data = NULL;
        if (vci != ATM_VPI_UNSPEC && vpi != ATM_VCI_UNSPEC)
index df3e1a44707acc74010cf5ce6fab815c4f744896..276c7e3f754c8395fbf470e528742cd98d63c724 100644 (file)
@@ -866,10 +866,19 @@ static const void *device_namespace(struct kobject *kobj)
        return ns;
 }
 
+static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+{
+       struct device *dev = kobj_to_dev(kobj);
+
+       if (dev->class && dev->class->get_ownership)
+               dev->class->get_ownership(dev, uid, gid);
+}
+
 static struct kobj_type device_ktype = {
        .release        = device_release,
        .sysfs_ops      = &dev_sysfs_ops,
        .namespace      = device_namespace,
+       .get_ownership  = device_get_ownership,
 };
 
 
index 3fb95c8d9fd83567496d77e1e4ade83975658401..3863c00372bb9ed9b68fa41f3e692736d779b81c 100644 (file)
@@ -1633,7 +1633,7 @@ static int find_free_cb(int id, void *ptr, void *data)
 }
 
 /* Netlink interface. */
-static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = {
+static const struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = {
        [NBD_ATTR_INDEX]                =       { .type = NLA_U32 },
        [NBD_ATTR_SIZE_BYTES]           =       { .type = NLA_U64 },
        [NBD_ATTR_BLOCK_SIZE_BYTES]     =       { .type = NLA_U64 },
@@ -1645,14 +1645,14 @@ static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = {
        [NBD_ATTR_DEVICE_LIST]          =       { .type = NLA_NESTED},
 };
 
-static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = {
+static const struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = {
        [NBD_SOCK_FD]                   =       { .type = NLA_U32 },
 };
 
 /* We don't use this right now since we don't parse the incoming list, but we
  * still want it here so userspace knows what to expect.
  */
-static struct nla_policy __attribute__((unused))
+static const struct nla_policy __attribute__((unused))
 nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
        [NBD_DEVICE_INDEX]              =       { .type = NLA_U32 },
        [NBD_DEVICE_CONNECTED]          =       { .type = NLA_U8 },
index e718b8c69a566713362f16c7d6795a86f4c79912..eeb7d31cbda5b2794afad4c9fd0ab65c8849240d 100644 (file)
@@ -19,6 +19,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/list.h>
@@ -239,7 +240,7 @@ void cn_del_callback(struct cb_id *id)
 }
 EXPORT_SYMBOL_GPL(cn_del_callback);
 
-static int cn_proc_show(struct seq_file *m, void *v)
+static int __maybe_unused cn_proc_show(struct seq_file *m, void *v)
 {
        struct cn_queue_dev *dev = cdev.cbdev;
        struct cn_callback_entry *cbq;
index 2bb6f0380758829077a789f8eb26932e108a360c..0997e166ea57755c4d0178702995cce489b06d7e 100644 (file)
@@ -1673,7 +1673,7 @@ static void chtls_timewait(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
 
        tp->rcv_nxt++;
-       tp->rx_opt.ts_recent_stamp = get_seconds();
+       tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
        tp->srtt_us = 0;
        tcp_time_wait(sk, TCP_TIME_WAIT, 0);
 }
index e88c019619481d16ab7690ecc8a862f94733e309..33d51281272bb066762d80b46161b4f7f44113b6 100644 (file)
@@ -394,12 +394,16 @@ static const char * const hwmon_power_attr_templates[] = {
        [hwmon_power_cap_hyst] = "power%d_cap_hyst",
        [hwmon_power_cap_max] = "power%d_cap_max",
        [hwmon_power_cap_min] = "power%d_cap_min",
+       [hwmon_power_min] = "power%d_min",
        [hwmon_power_max] = "power%d_max",
+       [hwmon_power_lcrit] = "power%d_lcrit",
        [hwmon_power_crit] = "power%d_crit",
        [hwmon_power_label] = "power%d_label",
        [hwmon_power_alarm] = "power%d_alarm",
        [hwmon_power_cap_alarm] = "power%d_cap_alarm",
+       [hwmon_power_min_alarm] = "power%d_min_alarm",
        [hwmon_power_max_alarm] = "power%d_max_alarm",
+       [hwmon_power_lcrit_alarm] = "power%d_lcrit_alarm",
        [hwmon_power_crit_alarm] = "power%d_crit_alarm",
 };
 
index 5d65582fe4d92f4840cdb84db8ae5d6f3dcf8652..616fc9b6fad8f41e28f5114f4d6afd4bd8e8788e 100644 (file)
@@ -423,7 +423,7 @@ tx_finish:
 
 static u16 hfi1_vnic_select_queue(struct net_device *netdev,
                                  struct sk_buff *skb,
-                                 void *accel_priv,
+                                 struct net_device *sb_dev,
                                  select_queue_fallback_t fallback)
 {
        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
index fb4d77be019b77797b5bf281dfdd63a447fd065f..0440966bc6ec31df4163d64f8530bcff1b8e0b2c 100644 (file)
@@ -1,5 +1,5 @@
 config MLX5_INFINIBAND
-       tristate "Mellanox Connect-IB HCA support"
+       tristate "Mellanox 5th generation network adapters (ConnectX series) support"
        depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
        depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n
        ---help---
index 188512bf46e63210e478a0527bc225c075dcf28a..ccc0b5d06a7df7bd4917ac17b95faab051621959 100644 (file)
 
 #include "cmd.h"
 
+int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey)
+{
+       u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)]   = {0};
+       int err;
+
+       MLX5_SET(query_special_contexts_in, in, opcode,
+                MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (!err)
+               *mkey = MLX5_GET(query_special_contexts_out, out,
+                                dump_fill_mkey);
+       return err;
+}
+
 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
 {
        u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
index e7206c8a8011b273581d290f9d751a84ec8a94e6..98ea4648c655a62f6240803893efcdcb7fe20ac5 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/mlx5/driver.h>
 
+int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
                               void *out, int out_size);
index 0c8aec62a42539fc90d67000361653fbf846ae3b..61558788b3fadb7546660f3b907820b264e3b1aa 100644 (file)
@@ -95,7 +95,7 @@ static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb,
 }
 
 static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
-                                void *accel_priv,
+                                struct net_device *sb_dev,
                                 select_queue_fallback_t fallback)
 {
        struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
@@ -107,7 +107,7 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
        mdata->entropy = opa_vnic_calc_entropy(skb);
        mdata->vl = opa_vnic_get_vl(adapter, skb);
        rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
-                                              accel_priv, fallback);
+                                              sb_dev, fallback);
        skb_pull(skb, sizeof(*mdata));
        return rc;
 }
index 6e0c2814d0329ace7b109915d58c9805ce2b1e6a..ef5560b848ab3a66adc29d119d5608bf726642e0 100644 (file)
@@ -9,6 +9,7 @@
  *
  */
 
+#include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -1321,7 +1322,7 @@ static inline void capinc_tty_exit(void) { }
  * /proc/capi/capi20:
  *  minor applid nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt
  */
-static int capi20_proc_show(struct seq_file *m, void *v)
+static int __maybe_unused capi20_proc_show(struct seq_file *m, void *v)
 {
        struct capidev *cdev;
        struct list_head *l;
@@ -1344,7 +1345,7 @@ static int capi20_proc_show(struct seq_file *m, void *v)
  * /proc/capi/capi20ncci:
  *  applid ncci
  */
-static int capi20ncci_proc_show(struct seq_file *m, void *v)
+static int __maybe_unused capi20ncci_proc_show(struct seq_file *m, void *v)
 {
        struct capidev *cdev;
        struct capincci *np;
index ee510f901720d1686524f37bc598bd6e3ee254a0..e8949f3dcae17712cae36973804f90d8602ca3b4 100644 (file)
@@ -9,6 +9,7 @@
  *
  */
 
+#include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -2451,7 +2452,7 @@ lower_callback(struct notifier_block *nb, unsigned long val, void *v)
  * /proc/capi/capidrv:
  * nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt
  */
-static int capidrv_proc_show(struct seq_file *m, void *v)
+static int __maybe_unused capidrv_proc_show(struct seq_file *m, void *v)
 {
        seq_printf(m, "%lu %lu %lu %lu\n",
                   global.ap.nrecvctlpkt,
index 20d0a080a2b0d521ace124c8b1206b0298b7d55e..ecdeb89645d00454381ef4020986fdf5e38f2eec 100644 (file)
@@ -739,6 +739,7 @@ static void read_int_callback(struct urb *urb)
 
        case HD_OPEN_B2CHANNEL_ACK:
                ++channel;
+               /* fall through */
        case HD_OPEN_B1CHANNEL_ACK:
                bcs = cs->bcs + channel;
                update_basstate(ucs, BS_B1OPEN << channel, 0);
@@ -752,6 +753,7 @@ static void read_int_callback(struct urb *urb)
 
        case HD_CLOSE_B2CHANNEL_ACK:
                ++channel;
+               /* fall through */
        case HD_CLOSE_B1CHANNEL_ACK:
                bcs = cs->bcs + channel;
                update_basstate(ucs, 0, BS_B1OPEN << channel);
@@ -765,6 +767,7 @@ static void read_int_callback(struct urb *urb)
 
        case HD_B2_FLOW_CONTROL:
                ++channel;
+               /* fall through */
        case HD_B1_FLOW_CONTROL:
                bcs = cs->bcs + channel;
                atomic_add((l - BAS_NORMFRAME) * BAS_CORRFRAMES,
@@ -972,16 +975,14 @@ static int starturbs(struct bc_state *bcs)
                        rc = -EFAULT;
                        goto error;
                }
+               usb_fill_int_urb(urb, bcs->cs->hw.bas->udev,
+                                usb_rcvisocpipe(urb->dev, 3 + 2 * bcs->channel),
+                                ubc->isoinbuf + k * BAS_INBUFSIZE,
+                                BAS_INBUFSIZE, read_iso_callback, bcs,
+                                BAS_FRAMETIME);
 
-               urb->dev = bcs->cs->hw.bas->udev;
-               urb->pipe = usb_rcvisocpipe(urb->dev, 3 + 2 * bcs->channel);
                urb->transfer_flags = URB_ISO_ASAP;
-               urb->transfer_buffer = ubc->isoinbuf + k * BAS_INBUFSIZE;
-               urb->transfer_buffer_length = BAS_INBUFSIZE;
                urb->number_of_packets = BAS_NUMFRAMES;
-               urb->interval = BAS_FRAMETIME;
-               urb->complete = read_iso_callback;
-               urb->context = bcs;
                for (j = 0; j < BAS_NUMFRAMES; j++) {
                        urb->iso_frame_desc[j].offset = j * BAS_MAXFRAME;
                        urb->iso_frame_desc[j].length = BAS_MAXFRAME;
@@ -1005,15 +1006,15 @@ static int starturbs(struct bc_state *bcs)
                        rc = -EFAULT;
                        goto error;
                }
-               urb->dev = bcs->cs->hw.bas->udev;
-               urb->pipe = usb_sndisocpipe(urb->dev, 4 + 2 * bcs->channel);
+               usb_fill_int_urb(urb, bcs->cs->hw.bas->udev,
+                                usb_sndisocpipe(urb->dev, 4 + 2 * bcs->channel),
+                                ubc->isooutbuf->data,
+                                sizeof(ubc->isooutbuf->data),
+                                write_iso_callback, &ubc->isoouturbs[k],
+                                BAS_FRAMETIME);
+
                urb->transfer_flags = URB_ISO_ASAP;
-               urb->transfer_buffer = ubc->isooutbuf->data;
-               urb->transfer_buffer_length = sizeof(ubc->isooutbuf->data);
                urb->number_of_packets = BAS_NUMFRAMES;
-               urb->interval = BAS_FRAMETIME;
-               urb->complete = write_iso_callback;
-               urb->context = &ubc->isoouturbs[k];
                for (j = 0; j < BAS_NUMFRAMES; ++j) {
                        urb->iso_frame_desc[j].offset = BAS_OUTBUFSIZE;
                        urb->iso_frame_desc[j].length = BAS_NORMFRAME;
index ae2b2669af1bc44667e49e60148c9591b7fa4e85..8eb28a83832eddc11f5fab235ad29f0252a89268 100644 (file)
@@ -361,6 +361,7 @@ modehdlc(struct bchannel *bch, int protocol)
        switch (protocol) {
        case -1: /* used for init */
                bch->state = -1;
+               /* fall through */
        case ISDN_P_NONE:
                if (bch->state == ISDN_P_NONE)
                        break;
index 34c93874af23bc43565119eab4b8b9b5d84a0815..ebb3fa2e1d00f1627d42363c3478999b1b0ee234 100644 (file)
@@ -1296,6 +1296,7 @@ mode_hfcpci(struct bchannel *bch, int bc, int protocol)
        case (-1): /* used for init */
                bch->state = -1;
                bch->nr = bc;
+               /* fall through */
        case (ISDN_P_NONE):
                if (bch->state == ISDN_P_NONE)
                        return 0;
@@ -2219,7 +2220,7 @@ hfc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct hfc_pci  *card;
        struct _hfc_map *m = (struct _hfc_map *)ent->driver_data;
 
-       card = kzalloc(sizeof(struct hfc_pci), GFP_ATOMIC);
+       card = kzalloc(sizeof(struct hfc_pci), GFP_KERNEL);
        if (!card) {
                printk(KERN_ERR "No kmem for HFC card\n");
                return err;
index 17cc879ad2bbf784be196528d14a533bf0267b29..6d05946b445eb039aeb6c9c755e94dbe8b8f1dac 100644 (file)
@@ -819,6 +819,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
        int             fifon = fifo->fifonum;
        int             i;
        int             hdlc = 0;
+       unsigned long   flags;
 
        if (debug & DBG_HFC_CALL_TRACE)
                printk(KERN_DEBUG "%s: %s: fifo(%i) len(%i) "
@@ -835,7 +836,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
                return;
        }
 
-       spin_lock(&hw->lock);
+       spin_lock_irqsave(&hw->lock, flags);
        if (fifo->dch) {
                rx_skb = fifo->dch->rx_skb;
                maxlen = fifo->dch->maxlen;
@@ -844,7 +845,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
        if (fifo->bch) {
                if (test_bit(FLG_RX_OFF, &fifo->bch->Flags)) {
                        fifo->bch->dropcnt += len;
-                       spin_unlock(&hw->lock);
+                       spin_unlock_irqrestore(&hw->lock, flags);
                        return;
                }
                maxlen = bchannel_get_rxbuf(fifo->bch, len);
@@ -854,7 +855,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
                                skb_trim(rx_skb, 0);
                        pr_warning("%s.B%d: No bufferspace for %d bytes\n",
                                   hw->name, fifo->bch->nr, len);
-                       spin_unlock(&hw->lock);
+                       spin_unlock_irqrestore(&hw->lock, flags);
                        return;
                }
                maxlen = fifo->bch->maxlen;
@@ -878,7 +879,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
                        } else {
                                printk(KERN_DEBUG "%s: %s: No mem for rx_skb\n",
                                       hw->name, __func__);
-                               spin_unlock(&hw->lock);
+                               spin_unlock_irqrestore(&hw->lock, flags);
                                return;
                        }
                }
@@ -888,7 +889,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
                               "for fifo(%d) HFCUSB_D_RX\n",
                               hw->name, __func__, fifon);
                        skb_trim(rx_skb, 0);
-                       spin_unlock(&hw->lock);
+                       spin_unlock_irqrestore(&hw->lock, flags);
                        return;
                }
        }
@@ -942,7 +943,7 @@ hfcsusb_rx_frame(struct usb_fifo *fifo, __u8 *data, unsigned int len,
                /* deliver transparent data to layer2 */
                recv_Bchannel(fifo->bch, MISDN_ID_ANY, false);
        }
-       spin_unlock(&hw->lock);
+       spin_unlock_irqrestore(&hw->lock, flags);
 }
 
 static void
@@ -979,18 +980,19 @@ rx_iso_complete(struct urb *urb)
        __u8 *buf;
        static __u8 eof[8];
        __u8 s0_state;
+       unsigned long flags;
 
        fifon = fifo->fifonum;
        status = urb->status;
 
-       spin_lock(&hw->lock);
+       spin_lock_irqsave(&hw->lock, flags);
        if (fifo->stop_gracefull) {
                fifo->stop_gracefull = 0;
                fifo->active = 0;
-               spin_unlock(&hw->lock);
+               spin_unlock_irqrestore(&hw->lock, flags);
                return;
        }
-       spin_unlock(&hw->lock);
+       spin_unlock_irqrestore(&hw->lock, flags);
 
        /*
         * ISO transfer only partially completed,
@@ -1096,15 +1098,16 @@ rx_int_complete(struct urb *urb)
        struct usb_fifo *fifo = (struct usb_fifo *) urb->context;
        struct hfcsusb *hw = fifo->hw;
        static __u8 eof[8];
+       unsigned long flags;
 
-       spin_lock(&hw->lock);
+       spin_lock_irqsave(&hw->lock, flags);
        if (fifo->stop_gracefull) {
                fifo->stop_gracefull = 0;
                fifo->active = 0;
-               spin_unlock(&hw->lock);
+               spin_unlock_irqrestore(&hw->lock, flags);
                return;
        }
-       spin_unlock(&hw->lock);
+       spin_unlock_irqrestore(&hw->lock, flags);
 
        fifon = fifo->fifonum;
        if ((!fifo->active) || (urb->status)) {
@@ -1172,12 +1175,13 @@ tx_iso_complete(struct urb *urb)
        int *tx_idx;
        int frame_complete, fifon, status, fillempty = 0;
        __u8 threshbit, *p;
+       unsigned long flags;
 
-       spin_lock(&hw->lock);
+       spin_lock_irqsave(&hw->lock, flags);
        if (fifo->stop_gracefull) {
                fifo->stop_gracefull = 0;
                fifo->active = 0;
-               spin_unlock(&hw->lock);
+               spin_unlock_irqrestore(&hw->lock, flags);
                return;
        }
 
@@ -1195,7 +1199,7 @@ tx_iso_complete(struct urb *urb)
        } else {
                printk(KERN_DEBUG "%s: %s: neither BCH nor DCH\n",
                       hw->name, __func__);
-               spin_unlock(&hw->lock);
+               spin_unlock_irqrestore(&hw->lock, flags);
                return;
        }
 
@@ -1375,7 +1379,7 @@ tx_iso_complete(struct urb *urb)
                               hw->name, __func__,
                               symbolic(urb_errlist, status), status, fifon);
        }
-       spin_unlock(&hw->lock);
+       spin_unlock_irqrestore(&hw->lock, flags);
 }
 
 /*
index 1fc290659e945a5ffdf2efcda8cf687c4bedd42d..3e01012be4abc3a8abe51d727ad1fe70b83badd7 100644 (file)
@@ -887,6 +887,7 @@ release_card(struct inf_hw *card) {
                                release_card(card->sc[i]);
                        card->sc[i] = NULL;
                }
+               /* fall through */
        default:
                pci_disable_device(card->pdev);
                pci_set_drvdata(card->pdev, NULL);
index b791688d0228ccb8921b57775bf6330adb8a96ea..386731ec248912f327afe1e327306274add1876e 100644 (file)
@@ -972,6 +972,7 @@ isar_pump_statev_fax(struct isar_ch *ch, u8 devt) {
                                break;
                        case PCTRL_CMD_FTM:
                                p1 = 2;
+                               /* fall through */
                        case PCTRL_CMD_FTH:
                                send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
                                          PCTRL_CMD_SILON, 1, &p1);
@@ -1177,6 +1178,7 @@ setup_pump(struct isar_ch *ch) {
                        send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG,
                                  PMOD_DTMF, 1, param);
                }
+               /* fall through */
        case ISDN_P_B_MODEM_ASYNC:
                ctrl = PMOD_DATAMODEM;
                if (test_bit(FLG_ORIGIN, &ch->bch.Flags)) {
@@ -1268,6 +1270,7 @@ setup_iom2(struct isar_ch *ch) {
        case ISDN_P_B_MODEM_ASYNC:
        case ISDN_P_B_T30_FAX:
                cmsb |= IOM_CTRL_RCV;
+               /* fall through */
        case ISDN_P_B_L2DTMF:
                if (test_bit(FLG_DTMFSEND, &ch->bch.Flags))
                        cmsb |= IOM_CTRL_RCV;
@@ -1560,6 +1563,7 @@ isar_l2l1(struct mISDNchannel *ch, struct sk_buff *skb)
                                ich->is->name, hh->id);
                        ret = -EINVAL;
                }
+               /* fall through */
        default:
                pr_info("%s: %s unknown prim(%x,%x)\n",
                        ich->is->name, __func__, hh->prim, hh->id);
index 89d9ba8ed535e2bf25ad807add639b0240942e3a..2b317cb63d068c00938fefed7e22cc8cfb27156f 100644 (file)
@@ -1084,7 +1084,7 @@ nj_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                return -ENODEV;
        }
 
-       card = kzalloc(sizeof(struct tiger_hw), GFP_ATOMIC);
+       card = kzalloc(sizeof(struct tiger_hw), GFP_KERNEL);
        if (!card) {
                pr_info("No kmem for Netjet\n");
                return err;
index a18b605fb4f23e97d863b6bc64db924ad58053eb..b161456c942e2ea94796bfceb4df71f8400a1bf1 100644 (file)
@@ -207,6 +207,7 @@ modehdlc(struct BCState *bcs, int mode, int bc)
                bcs->mode = 1;
                bcs->channel = bc;
                bc = 0;
+               /* fall through */
        case (L1_MODE_NULL):
                if (bcs->mode == L1_MODE_NULL)
                        return;
index ddec47a911a0df6ea74c1a9f5e926fbbacc54a2a..9ee06328784c59f8e6eec67b64873b22f9b46fbb 100644 (file)
@@ -1012,7 +1012,7 @@ dummy_pstack(struct PStack *st, int pr, void *arg) {
 
 static int
 init_PStack(struct PStack **stp) {
-       *stp = kmalloc(sizeof(struct PStack), GFP_ATOMIC);
+       *stp = kmalloc(sizeof(struct PStack), GFP_KERNEL);
        if (!*stp)
                return -ENOMEM;
        (*stp)->next = NULL;
@@ -1369,6 +1369,7 @@ leased_l1l2(struct PStack *st, int pr, void *arg)
        case (PH_ACTIVATE | INDICATION):
        case (PH_ACTIVATE | CONFIRM):
                event = EV_LEASED;
+               /* fall through */
        case (PH_DEACTIVATE | INDICATION):
        case (PH_DEACTIVATE | CONFIRM):
                if (test_bit(FLG_TWO_DCHAN, &chanp->cs->HW_Flags))
index 7108bdb8742e7a41b5aac61ada26ad212211fe67..b12e6cae26c28d64c927151a25444e185ce24242 100644 (file)
@@ -1029,7 +1029,7 @@ static int hisax_cs_new(int cardnr, char *id, struct IsdnCard *card,
 
        *cs_out = NULL;
 
-       cs = kzalloc(sizeof(struct IsdnCardState), GFP_ATOMIC);
+       cs = kzalloc(sizeof(struct IsdnCardState), GFP_KERNEL);
        if (!cs) {
                printk(KERN_WARNING
                       "HiSax: No memory for IsdnCardState(card %d)\n",
@@ -1059,12 +1059,12 @@ static int hisax_cs_new(int cardnr, char *id, struct IsdnCard *card,
                       "HiSax: Card Type %d out of range\n", card->typ);
                goto outf_cs;
        }
-       if (!(cs->dlog = kmalloc(MAX_DLOG_SPACE, GFP_ATOMIC))) {
+       if (!(cs->dlog = kmalloc(MAX_DLOG_SPACE, GFP_KERNEL))) {
                printk(KERN_WARNING
                       "HiSax: No memory for dlog(card %d)\n", cardnr + 1);
                goto outf_cs;
        }
-       if (!(cs->status_buf = kmalloc(HISAX_STATUS_BUFSIZE, GFP_ATOMIC))) {
+       if (!(cs->status_buf = kmalloc(HISAX_STATUS_BUFSIZE, GFP_KERNEL))) {
                printk(KERN_WARNING
                       "HiSax: No memory for status_buf(card %d)\n",
                       cardnr + 1);
@@ -1123,7 +1123,7 @@ static int hisax_cs_setup(int cardnr, struct IsdnCard *card,
 {
        int ret;
 
-       if (!(cs->rcvbuf = kmalloc(MAX_DFRAME_LEN_L1, GFP_ATOMIC))) {
+       if (!(cs->rcvbuf = kmalloc(MAX_DFRAME_LEN_L1, GFP_KERNEL))) {
                printk(KERN_WARNING "HiSax: No memory for isac rcvbuf\n");
                ll_unload(cs);
                goto outf_cs;
@@ -1843,6 +1843,7 @@ static void hisax_b_l2l1(struct PStack *st, int pr, void *arg)
        case PH_DEACTIVATE | REQUEST:
                test_and_clear_bit(BC_FLG_BUSY, &bcs->Flag);
                skb_queue_purge(&bcs->squeue);
+               /* fall through */
        default:
                B_L2L1(b_if, pr, arg);
                break;
index 35c6df6534ecde8507c417a61a1c93f41fa4a293..a6d8af02354a4eb7240a0393f65226f424f0c223 100644 (file)
@@ -108,6 +108,7 @@ ReadISAC(struct IsdnCardState *cs, u_char offset)
        switch (cs->subtyp) {
        case R647:
                off2 = ((off2 << 8 & 0xf000) | (off2 & 0xf));
+               /* fall through */
        case R685:
                return (readreg(cs->hw.gazel.isac, off2));
        case R753:
@@ -125,6 +126,7 @@ WriteISAC(struct IsdnCardState *cs, u_char offset, u_char value)
        switch (cs->subtyp) {
        case R647:
                off2 = ((off2 << 8 & 0xf000) | (off2 & 0xf));
+               /* fall through */
        case R685:
                writereg(cs->hw.gazel.isac, off2, value);
                break;
@@ -203,6 +205,7 @@ ReadHSCX(struct IsdnCardState *cs, int hscx, u_char offset)
        switch (cs->subtyp) {
        case R647:
                off2 = ((off2 << 8 & 0xf000) | (off2 & 0xf));
+               /* fall through */
        case R685:
                return (readreg(cs->hw.gazel.hscx[hscx], off2));
        case R753:
@@ -220,6 +223,7 @@ WriteHSCX(struct IsdnCardState *cs, int hscx, u_char offset, u_char value)
        switch (cs->subtyp) {
        case R647:
                off2 = ((off2 << 8 & 0xf000) | (off2 & 0xf));
+               /* fall through */
        case R685:
                writereg(cs->hw.gazel.hscx[hscx], off2, value);
                break;
index 97ecb3073045612f28bb80954c068726ee957504..1d4cd01d46851027776fac05f92c88f91dbb883d 100644 (file)
@@ -432,16 +432,12 @@ fill_isoc_urb(struct urb *urb, struct usb_device *dev, unsigned int pipe,
 {
        int k;
 
-       urb->dev = dev;
-       urb->pipe = pipe;
-       urb->complete = complete;
+       usb_fill_int_urb(urb, dev, pipe, buf, packet_size * num_packets,
+                        complete, context, interval);
+
        urb->number_of_packets = num_packets;
-       urb->transfer_buffer_length = packet_size * num_packets;
-       urb->context = context;
-       urb->transfer_buffer = buf;
        urb->transfer_flags = URB_ISO_ASAP;
        urb->actual_length = 0;
-       urb->interval = interval;
        for (k = 0; k < num_packets; k++) {
                urb->iso_frame_desc[k].offset = packet_size * k;
                urb->iso_frame_desc[k].length = packet_size;
index d01ff116797b937d02f04ce17abc8fe8802cd528..82c1879f56647e7bc39b789587c78dafa6367cfa 100644 (file)
@@ -1089,6 +1089,7 @@ isar_pump_statev_fax(struct BCState *bcs, u_char devt) {
                                break;
                        case PCTRL_CMD_FTM:
                                p1 = 2;
+                               /* fall through */
                        case PCTRL_CMD_FTH:
                                sendmsg(cs, dps | ISAR_HIS_PUMPCTRL,
                                        PCTRL_CMD_SILON, 1, &p1);
@@ -1097,6 +1098,7 @@ isar_pump_statev_fax(struct BCState *bcs, u_char devt) {
                        case PCTRL_CMD_FRM:
                                if (frm_extra_delay)
                                        mdelay(frm_extra_delay);
+                               /* fall through */
                        case PCTRL_CMD_FRH:
                                p1 = bcs->hw.isar.mod = bcs->hw.isar.newmod;
                                bcs->hw.isar.newmod = 0;
index da0a1c6aa32996e6cab700b19654b819cfa0afa4..98f60d1523f4ec06fb761014c3ebdcd592608cfe 100644 (file)
@@ -88,6 +88,7 @@ l3_1tr6_setup_req(struct l3_process *pc, u_char pr, void *arg)
                        break;
                case 'C':
                        channel = 0x08;
+                       /* fall through */
                case 'P':
                        channel |= 0x80;
                        teln++;
index 18a3484b1f7e96f33439875ede43704f91feaab8..368d152a8f1d7d00b6414f9963a7cad727beb77e 100644 (file)
@@ -1282,6 +1282,7 @@ l3dss1_setup_req(struct l3_process *pc, u_char pr,
                        switch (0x5f & *teln) {
                        case 'C':
                                channel = 0x08;
+                               /* fall through */
                        case 'P':
                                channel |= 0x80;
                                teln++;
index 1cb9930d5e24cc23df4ce777f3d5acdd21374985..f207fda691c71ae67b260d66485db48998346f80 100644 (file)
@@ -408,15 +408,10 @@ fill_isoc_urb(struct urb *urb, struct usb_device *dev,
 {
        int k;
 
-       urb->dev = dev;
-       urb->pipe = pipe;
-       urb->interval = 1;
-       urb->transfer_buffer = buf;
+       usb_fill_int_urb(urb, dev, pipe, buf, num_packets * packet_size,
+                        complete, context, 1);
+
        urb->number_of_packets = num_packets;
-       urb->transfer_buffer_length = num_packets * packet_size;
-       urb->actual_length = 0;
-       urb->complete = complete;
-       urb->context = context;
        urb->transfer_flags = URB_ISO_ASAP;
        for (k = 0; k < num_packets; k++) {
                urb->iso_frame_desc[k].offset = packet_size * k;
index 4a0425378f37864779135048c097ec1e73cc165a..ba177c3a621b1371e0c50ded7a3d0967c4309039 100644 (file)
@@ -99,6 +99,7 @@ pof_handle_data(hysdn_card *card, int datlen)
 
        case TAG_CBOOTDTA:
                DecryptBuf(boot, datlen);       /* we need to encrypt the buffer */
+               /* fall through */
        case TAG_BOOTDTA:
                if (card->debug_flags & LOG_POF_RECORD)
                        hysdn_addlog(card, "POF got %s len=%d offs=0x%lx",
@@ -137,6 +138,7 @@ pof_handle_data(hysdn_card *card, int datlen)
 
        case TAG_CABSDATA:
                DecryptBuf(boot, datlen);       /* we need to encrypt the buffer */
+               /* fall through */
        case TAG_ABSDATA:
                if (card->debug_flags & LOG_POF_RECORD)
                        hysdn_addlog(card, "POF got %s len=%d offs=0x%lx",
index 960f26348bb58e00f81166444a53d7544f8e7f68..b730037a0e2d383b2f6037561ccd396c558077a8 100644 (file)
@@ -787,7 +787,7 @@ isdn_tty_suspend(char *id, modem_info *info, atemu *m)
                cmd.parm.cmsg.para[3] = 4; /* 16 bit 0x0004 Suspend */
                cmd.parm.cmsg.para[4] = 0;
                cmd.parm.cmsg.para[5] = l;
-               strncpy(&cmd.parm.cmsg.para[6], id, l);
+               memcpy(&cmd.parm.cmsg.para[6], id, l);
                cmd.command = CAPI_PUT_MESSAGE;
                cmd.driver = info->isdn_driver;
                cmd.arg = info->isdn_channel;
@@ -877,7 +877,7 @@ isdn_tty_resume(char *id, modem_info *info, atemu *m)
                cmd.parm.cmsg.para[3] = 5; /* 16 bit 0x0005 Resume */
                cmd.parm.cmsg.para[4] = 0;
                cmd.parm.cmsg.para[5] = l;
-               strncpy(&cmd.parm.cmsg.para[6], id, l);
+               memcpy(&cmd.parm.cmsg.para[6], id, l);
                cmd.command = CAPI_PUT_MESSAGE;
                info->dialing = 1;
 //             strcpy(dev->num[i], n);
index 8b74ce412524827c14333e4444e275ac4fdcc723..2a5f6668756cd6ea79cc8e98e18e7b606fecd2e7 100644 (file)
@@ -354,6 +354,7 @@ EncodeMatrix(unsigned char *buf, int len, unsigned char *m, int mlen)
                                printk(KERN_WARNING "isdn_v110 (EncodeMatrix): buffer full!\n");
                                return line;
                        }
+                       /* else: fall through */
                case 128:
                        m[line] = 128;  /* leftmost -> set byte to 1000000 */
                        mbit = 64;      /* current bit in the matrix line */
@@ -386,20 +387,28 @@ EncodeMatrix(unsigned char *buf, int len, unsigned char *m, int mlen)
                switch (++line % 10) {
                case 1:
                        m[line++] = 0xfe;
+                       /* fall through */
                case 2:
                        m[line++] = 0xfe;
+                       /* fall through */
                case 3:
                        m[line++] = 0xfe;
+                       /* fall through */
                case 4:
                        m[line++] = 0xfe;
+                       /* fall through */
                case 5:
                        m[line++] = 0xbf;
+                       /* fall through */
                case 6:
                        m[line++] = 0xfe;
+                       /* fall through */
                case 7:
                        m[line++] = 0xfe;
+                       /* fall through */
                case 8:
                        m[line++] = 0xfe;
+                       /* fall through */
                case 9:
                        m[line++] = 0xfe;
                }
index 422dced7c90ac26dcf0d366fedb32ab9edf44207..d97c6dd52223c9519e2af28e150cbc9f2a1faa0c 100644 (file)
@@ -539,6 +539,7 @@ create_l2entity(struct mISDNdevice *dev, struct mISDNchannel *ch,
                rq.protocol = ISDN_P_NT_S0;
                if (dev->Dprotocols & (1 << ISDN_P_NT_E1))
                        rq.protocol = ISDN_P_NT_E1;
+               /* fall through */
        case ISDN_P_LAPD_TE:
                ch->recv = mISDN_queue_message;
                ch->peer = &dev->D.st->own;
index 217b790d22edc2fdd90d4f3c634cb2fa38bc095f..a764a83f99dabe54585dbad7dba40b6601177c03 100644 (file)
@@ -4102,7 +4102,8 @@ static inline int bond_slave_override(struct bonding *bond,
 
 
 static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
-                            void *accel_priv, select_queue_fallback_t fallback)
+                            struct net_device *sb_dev,
+                            select_queue_fallback_t fallback)
 {
        /* This helper function exists to help dev_pick_tx get the correct
         * destination queue.  Using a helper function skips a call to
index 6096440e96eaaa225cade7d90fa25471fb405bc0..35847250da5aa935a0286b1b28396813dfcc0ee2 100644 (file)
@@ -160,14 +160,19 @@ static ssize_t bonding_sysfs_store_option(struct device *d,
 {
        struct bonding *bond = to_bond(d);
        const struct bond_option *opt;
+       char *buffer_clone;
        int ret;
 
        opt = bond_opt_get_by_name(attr->attr.name);
        if (WARN_ON(!opt))
                return -ENOENT;
-       ret = bond_opt_tryset_rtnl(bond, opt->id, (char *)buffer);
+       buffer_clone = kstrndup(buffer, count, GFP_KERNEL);
+       if (!buffer_clone)
+               return -ENOMEM;
+       ret = bond_opt_tryset_rtnl(bond, opt->id, buffer_clone);
        if (!ret)
                ret = count;
+       kfree(buffer_clone);
 
        return ret;
 }
index d4dd4da23997db490c60ea85ced838c6da463c64..da636a22c54278c2f8d082945e2aedd7508b46d5 100644 (file)
@@ -73,7 +73,7 @@ MODULE_PARM_DESC(msgobj15_eff, "Extended 29-bit frames for message object 15 "
 
 static int i82527_compat;
 module_param(i82527_compat, int, 0444);
-MODULE_PARM_DESC(i82527_compat, "Strict Intel 82527 comptibility mode "
+MODULE_PARM_DESC(i82527_compat, "Strict Intel 82527 compatibility mode "
                 "without using additional functions");
 
 /*
index 3c71f1cb205faaa98617eeb2f0f0e3c94a2f36e8..49163570a63afad2e36777993a57319370d6c8b0 100644 (file)
@@ -649,8 +649,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
        can_skb_prv(skb)->ifindex = dev->ifindex;
        can_skb_prv(skb)->skbcnt = 0;
 
-       *cf = skb_put(skb, sizeof(struct can_frame));
-       memset(*cf, 0, sizeof(struct can_frame));
+       *cf = skb_put_zero(skb, sizeof(struct can_frame));
 
        return skb;
 }
@@ -678,8 +677,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev,
        can_skb_prv(skb)->ifindex = dev->ifindex;
        can_skb_prv(skb)->skbcnt = 0;
 
-       *cfd = skb_put(skb, sizeof(struct canfd_frame));
-       memset(*cfd, 0, sizeof(struct canfd_frame));
+       *cfd = skb_put_zero(skb, sizeof(struct canfd_frame));
 
        return skb;
 }
@@ -703,7 +701,8 @@ EXPORT_SYMBOL_GPL(alloc_can_err_skb);
 /*
  * Allocate and setup space for the CAN network device
  */
-struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
+struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
+                                   unsigned int txqs, unsigned int rxqs)
 {
        struct net_device *dev;
        struct can_priv *priv;
@@ -715,7 +714,8 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
        else
                size = sizeof_priv;
 
-       dev = alloc_netdev(size, "can%d", NET_NAME_UNKNOWN, can_setup);
+       dev = alloc_netdev_mqs(size, "can%d", NET_NAME_UNKNOWN, can_setup,
+                              txqs, rxqs);
        if (!dev)
                return NULL;
 
@@ -734,7 +734,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
 
        return dev;
 }
-EXPORT_SYMBOL_GPL(alloc_candev);
+EXPORT_SYMBOL_GPL(alloc_candev_mqs);
 
 /*
  * Free space of the CAN network device
index d53a45bf2a72eb9e0d3cd16fe9f876bccbe5ade5..8e972ef0863769e88a2c9d6cec37408d66566292 100644 (file)
@@ -1,24 +1,13 @@
-/*
- * flexcan.c - FLEXCAN CAN controller driver
- *
- * Copyright (c) 2005-2006 Varma Electronics Oy
- * Copyright (c) 2009 Sascha Hauer, Pengutronix
- * Copyright (c) 2010-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
- * Copyright (c) 2014 David Jander, Protonic Holland
- *
- * Based on code originally by Andrey Volkov <avolkov@varma-el.com>
- *
- * LICENCE:
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// flexcan.c - FLEXCAN CAN controller driver
+//
+// Copyright (c) 2005-2006 Varma Electronics Oy
+// Copyright (c) 2009 Sascha Hauer, Pengutronix
+// Copyright (c) 2010-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2014 David Jander, Protonic Holland
+//
+// Based on code originally by Andrey Volkov <avolkov@varma-el.com>
 
 #include <linux/netdevice.h>
 #include <linux/can.h>
@@ -523,7 +512,7 @@ static int flexcan_get_berr_counter(const struct net_device *dev,
        return err;
 }
 
-static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        const struct flexcan_priv *priv = netdev_priv(dev);
        struct can_frame *cf = (struct can_frame *)skb->data;
index adfdb66a486e1be87f8bbfa3e10741b812616148..02042cb09bd29f2d680623d21be2a823f111b587 100644 (file)
@@ -1684,7 +1684,7 @@ static int ican3_stop(struct net_device *ndev)
        return 0;
 }
 
-static int ican3_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t ican3_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
        struct ican3_dev *mod = netdev_priv(ndev);
        struct can_frame *cf = (struct can_frame *)skb->data;
index ed8561d4a90f4b5e25683a5483f0d98248d7dca2..5696d7e807513a529823ac218f8236bcf52aea8e 100644 (file)
@@ -486,7 +486,7 @@ int peak_canfd_handle_msgs_list(struct peak_canfd_priv *priv,
                if (msg_size <= 0)
                        break;
 
-               msg_ptr += msg_size;
+               msg_ptr += ALIGN(msg_size, 4);
        }
 
        if (msg_size < 0)
index 455a3797a20065d264a837dcc89e91453a2a93ba..c458d5fdc8d3e55ed7ecb9de2772fc51a645edd9 100644 (file)
@@ -174,9 +174,6 @@ struct pciefd_page {
        u32 size;
 };
 
-#define CANFD_IRQ_SET          0x00000001
-#define CANFD_TX_PATH_SET      0x00000002
-
 /* CAN-FD channel object */
 struct pciefd_board;
 struct pciefd_can {
@@ -418,7 +415,7 @@ static int pciefd_pre_cmd(struct peak_canfd_priv *ucan)
                        break;
 
                /* going into operational mode: setup IRQ handler */
-               err = request_irq(priv->board->pci_dev->irq,
+               err = request_irq(priv->ucan.ndev->irq,
                                  pciefd_irq_handler,
                                  IRQF_SHARED,
                                  PCIEFD_DRV_NAME,
@@ -491,15 +488,18 @@ static int pciefd_post_cmd(struct peak_canfd_priv *ucan)
 
                /* controller now in reset mode: */
 
+               /* disable IRQ for this CAN */
+               pciefd_can_writereg(priv, CANFD_CTL_IEN_BIT,
+                                   PCIEFD_REG_CAN_RX_CTL_CLR);
+
                /* stop and reset DMA addresses in Tx/Rx engines */
                pciefd_can_clear_tx_dma(priv);
                pciefd_can_clear_rx_dma(priv);
 
-               /* disable IRQ for this CAN */
-               pciefd_can_writereg(priv, CANFD_CTL_IEN_BIT,
-                                   PCIEFD_REG_CAN_RX_CTL_CLR);
+               /* wait for above commands to complete (read cycle) */
+               (void)pciefd_sys_readreg(priv->board, PCIEFD_REG_SYS_VER1);
 
-               free_irq(priv->board->pci_dev->irq, priv);
+               free_irq(priv->ucan.ndev->irq, priv);
 
                ucan->can.state = CAN_STATE_STOPPED;
 
@@ -638,7 +638,7 @@ static int pciefd_can_probe(struct pciefd_board *pciefd)
                                                 GFP_KERNEL);
        if (!priv->tx_dma_vaddr) {
                dev_err(&pciefd->pci_dev->dev,
-                       "Tx dmaim_alloc_coherent(%u) failure\n",
+                       "Tx dmam_alloc_coherent(%u) failure\n",
                        PCIEFD_TX_DMA_SIZE);
                goto err_free_candev;
        }
@@ -691,7 +691,7 @@ static int pciefd_can_probe(struct pciefd_board *pciefd)
        pciefd->can[pciefd->can_count] = priv;
 
        dev_info(&pciefd->pci_dev->dev, "%s at reg_base=0x%p irq=%d\n",
-                ndev->name, priv->reg_base, pciefd->pci_dev->irq);
+                ndev->name, priv->reg_base, ndev->irq);
 
        return 0;
 
index 5adc95c922eef2d9f968a2dea3bac7c2dd3bfda2..a97b81d1d0da9b0b29fc622b34b6b54a06872ae8 100644 (file)
@@ -608,7 +608,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        writeb(0x00, cfg_base + PITA_GPIOICR);
        /* Toggle reset */
        writeb(0x05, cfg_base + PITA_MISC + 3);
-       mdelay(5);
+       usleep_range(5000, 6000);
        /* Leave parport mux mode */
        writeb(0x04, cfg_base + PITA_MISC + 3);
 
index 485b19c9ae47edb9e161a8e2c8b800f3a63b340e..b8c39ede7cd51445b6ed653585811066ab75d7d4 100644 (file)
@@ -530,7 +530,7 @@ static int pcan_add_channels(struct pcan_pccard *card)
        pcan_write_reg(card, PCC_CCR, ccr);
 
        /* wait 2ms before unresetting channels */
-       mdelay(2);
+       usleep_range(2000, 3000);
 
        ccr &= ~PCC_CCR_RST_ALL;
        pcan_write_reg(card, PCC_CCR, ccr);
index 1ac2090a17216cf47e74be35994fbd4a5e267f6f..093fc9a529f0816e78b929eb1c5bd635d4d7665c 100644 (file)
@@ -409,7 +409,7 @@ static int sun4ican_set_mode(struct net_device *dev, enum can_mode mode)
  * xx xx xx xx         ff         ll 00 11 22 33 44 55 66 77
  * [ can_id ] [flags] [len] [can data (up to 8 bytes]
  */
-static int sun4ican_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sun4ican_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct sun4ican_priv *priv = netdev_priv(dev);
        struct can_frame *cf = (struct can_frame *)skb->data;
index c36f4bdcbf4fba8a22e581b3617269a4e2d2ef61..750d04d9e2ae07bc320a90b5e2d495149c4de4a4 100644 (file)
@@ -1,6 +1,12 @@
 menu "CAN USB interfaces"
        depends on USB
 
+config CAN_8DEV_USB
+       tristate "8 devices USB2CAN interface"
+       ---help---
+         This driver supports the USB2CAN interface
+         from 8 devices (http://www.8devices.com).
+
 config CAN_EMS_USB
        tristate "EMS CPC-USB/ARM7 CAN/USB interface"
        ---help---
@@ -26,7 +32,7 @@ config CAN_KVASER_USB
        tristate "Kvaser CAN/USB interface"
        ---help---
          This driver adds support for Kvaser CAN/USB devices like Kvaser
-         Leaf Light and Kvaser USBcan II.
+         Leaf Light, Kvaser USBcan II and Kvaser Memorator Pro 5xHS.
 
          The driver provides support for the following devices:
            - Kvaser Leaf Light
@@ -55,12 +61,30 @@ config CAN_KVASER_USB
            - Kvaser Memorator HS/HS
            - Kvaser Memorator HS/LS
            - Scania VCI2 (if you have the Kvaser logo on top)
+           - Kvaser BlackBird v2
+           - Kvaser Leaf Pro HS v2
+           - Kvaser Hybrid 2xCAN/LIN
+           - Kvaser Hybrid Pro 2xCAN/LIN
+           - Kvaser Memorator 2xHS v2
+           - Kvaser Memorator Pro 2xHS v2
+           - Kvaser Memorator Pro 5xHS
+           - Kvaser USBcan Light 4xHS
+           - Kvaser USBcan Pro 2xHS v2
+           - Kvaser USBcan Pro 5xHS
+           - ATI Memorator Pro 2xHS v2
+           - ATI USBcan Pro 2xHS v2
 
          If unsure, say N.
 
          To compile this driver as a module, choose M here: the
          module will be called kvaser_usb.
 
+config CAN_MCBA_USB
+       tristate "Microchip CAN BUS Analyzer interface"
+       ---help---
+         This driver supports the CAN BUS Analyzer interface
+         from Microchip (http://www.microchip.com/development-tools/).
+
 config CAN_PEAK_USB
        tristate "PEAK PCAN-USB/USB Pro interfaces for CAN 2.0b/CAN-FD"
        ---help---
@@ -77,16 +101,26 @@ config CAN_PEAK_USB
 
          (see also http://www.peak-system.com).
 
-config CAN_8DEV_USB
-       tristate "8 devices USB2CAN interface"
-       ---help---
-         This driver supports the USB2CAN interface
-         from 8 devices (http://www.8devices.com).
-
 config CAN_MCBA_USB
        tristate "Microchip CAN BUS Analyzer interface"
        ---help---
          This driver supports the CAN BUS Analyzer interface
          from Microchip (http://www.microchip.com/development-tools/).
 
+config CAN_UCAN
+       tristate "Theobroma Systems UCAN interface"
+       ---help---
+         This driver supports the Theobroma Systems
+         UCAN USB-CAN interface.
+
+         The UCAN driver supports the microcontroller-based USB/CAN
+         adapters from Theobroma Systems. There are two form-factors
+         that run essentially the same firmware:
+
+         * Seal: standalone USB stick
+                 https://www.theobroma-systems.com/seal)
+         * Mule: integrated on the PCB of various System-on-Modules
+                 from Theobroma Systems like the A31-µQ7 and the RK3399-Q7
+                 (https://www.theobroma-systems.com/rk3399-q7)
+
 endmenu
index 49ac7b99ba32ef8aecd47c2750a2f39df66851b5..aa0f17c0b2edc48f1034f0f68829e10666eda00d 100644 (file)
@@ -3,10 +3,11 @@
 #  Makefile for the Linux Controller Area Network USB drivers.
 #
 
+obj-$(CONFIG_CAN_8DEV_USB) += usb_8dev.o
 obj-$(CONFIG_CAN_EMS_USB) += ems_usb.o
 obj-$(CONFIG_CAN_ESD_USB2) += esd_usb2.o
 obj-$(CONFIG_CAN_GS_USB) += gs_usb.o
-obj-$(CONFIG_CAN_KVASER_USB) += kvaser_usb.o
-obj-$(CONFIG_CAN_PEAK_USB) += peak_usb/
-obj-$(CONFIG_CAN_8DEV_USB) += usb_8dev.o
+obj-$(CONFIG_CAN_KVASER_USB) += kvaser_usb/
 obj-$(CONFIG_CAN_MCBA_USB) += mcba_usb.o
+obj-$(CONFIG_CAN_PEAK_USB) += peak_usb/
+obj-$(CONFIG_CAN_UCAN) += ucan.o
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
deleted file mode 100644 (file)
index daed57d..0000000
+++ /dev/null
@@ -1,2085 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * Parts of this driver are based on the following:
- *  - Kvaser linux leaf driver (version 4.78)
- *  - CAN driver for esd CAN-USB/2
- *  - Kvaser linux usbcanII driver (version 5.3)
- *
- * Copyright (C) 2002-2006 KVASER AB, Sweden. All rights reserved.
- * Copyright (C) 2010 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh
- * Copyright (C) 2012 Olivier Sobrie <olivier@sobrie.be>
- * Copyright (C) 2015 Valeo S.A.
- */
-
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/completion.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/usb.h>
-
-#include <linux/can.h>
-#include <linux/can/dev.h>
-#include <linux/can/error.h>
-
-#define MAX_RX_URBS                    4
-#define START_TIMEOUT                  1000 /* msecs */
-#define STOP_TIMEOUT                   1000 /* msecs */
-#define USB_SEND_TIMEOUT               1000 /* msecs */
-#define USB_RECV_TIMEOUT               1000 /* msecs */
-#define RX_BUFFER_SIZE                 3072
-#define CAN_USB_CLOCK                  8000000
-#define MAX_NET_DEVICES                        3
-#define MAX_USBCAN_NET_DEVICES         2
-
-/* Kvaser Leaf USB devices */
-#define KVASER_VENDOR_ID               0x0bfd
-#define USB_LEAF_DEVEL_PRODUCT_ID      10
-#define USB_LEAF_LITE_PRODUCT_ID       11
-#define USB_LEAF_PRO_PRODUCT_ID                12
-#define USB_LEAF_SPRO_PRODUCT_ID       14
-#define USB_LEAF_PRO_LS_PRODUCT_ID     15
-#define USB_LEAF_PRO_SWC_PRODUCT_ID    16
-#define USB_LEAF_PRO_LIN_PRODUCT_ID    17
-#define USB_LEAF_SPRO_LS_PRODUCT_ID    18
-#define USB_LEAF_SPRO_SWC_PRODUCT_ID   19
-#define USB_MEMO2_DEVEL_PRODUCT_ID     22
-#define USB_MEMO2_HSHS_PRODUCT_ID      23
-#define USB_UPRO_HSHS_PRODUCT_ID       24
-#define USB_LEAF_LITE_GI_PRODUCT_ID    25
-#define USB_LEAF_PRO_OBDII_PRODUCT_ID  26
-#define USB_MEMO2_HSLS_PRODUCT_ID      27
-#define USB_LEAF_LITE_CH_PRODUCT_ID    28
-#define USB_BLACKBIRD_SPRO_PRODUCT_ID  29
-#define USB_OEM_MERCURY_PRODUCT_ID     34
-#define USB_OEM_LEAF_PRODUCT_ID                35
-#define USB_CAN_R_PRODUCT_ID           39
-#define USB_LEAF_LITE_V2_PRODUCT_ID    288
-#define USB_MINI_PCIE_HS_PRODUCT_ID    289
-#define USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID 290
-#define USB_USBCAN_LIGHT_2HS_PRODUCT_ID        291
-#define USB_MINI_PCIE_2HS_PRODUCT_ID   292
-
-static inline bool kvaser_is_leaf(const struct usb_device_id *id)
-{
-       return id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID &&
-              id->idProduct <= USB_MINI_PCIE_2HS_PRODUCT_ID;
-}
-
-/* Kvaser USBCan-II devices */
-#define USB_USBCAN_REVB_PRODUCT_ID     2
-#define USB_VCI2_PRODUCT_ID            3
-#define USB_USBCAN2_PRODUCT_ID         4
-#define USB_MEMORATOR_PRODUCT_ID       5
-
-static inline bool kvaser_is_usbcan(const struct usb_device_id *id)
-{
-       return id->idProduct >= USB_USBCAN_REVB_PRODUCT_ID &&
-              id->idProduct <= USB_MEMORATOR_PRODUCT_ID;
-}
-
-/* USB devices features */
-#define KVASER_HAS_SILENT_MODE         BIT(0)
-#define KVASER_HAS_TXRX_ERRORS         BIT(1)
-
-/* Message header size */
-#define MSG_HEADER_LEN                 2
-
-/* Can message flags */
-#define MSG_FLAG_ERROR_FRAME           BIT(0)
-#define MSG_FLAG_OVERRUN               BIT(1)
-#define MSG_FLAG_NERR                  BIT(2)
-#define MSG_FLAG_WAKEUP                        BIT(3)
-#define MSG_FLAG_REMOTE_FRAME          BIT(4)
-#define MSG_FLAG_RESERVED              BIT(5)
-#define MSG_FLAG_TX_ACK                        BIT(6)
-#define MSG_FLAG_TX_REQUEST            BIT(7)
-
-/* Can states (M16C CxSTRH register) */
-#define M16C_STATE_BUS_RESET           BIT(0)
-#define M16C_STATE_BUS_ERROR           BIT(4)
-#define M16C_STATE_BUS_PASSIVE         BIT(5)
-#define M16C_STATE_BUS_OFF             BIT(6)
-
-/* Can msg ids */
-#define CMD_RX_STD_MESSAGE             12
-#define CMD_TX_STD_MESSAGE             13
-#define CMD_RX_EXT_MESSAGE             14
-#define CMD_TX_EXT_MESSAGE             15
-#define CMD_SET_BUS_PARAMS             16
-#define CMD_GET_BUS_PARAMS             17
-#define CMD_GET_BUS_PARAMS_REPLY       18
-#define CMD_GET_CHIP_STATE             19
-#define CMD_CHIP_STATE_EVENT           20
-#define CMD_SET_CTRL_MODE              21
-#define CMD_GET_CTRL_MODE              22
-#define CMD_GET_CTRL_MODE_REPLY                23
-#define CMD_RESET_CHIP                 24
-#define CMD_RESET_CARD                 25
-#define CMD_START_CHIP                 26
-#define CMD_START_CHIP_REPLY           27
-#define CMD_STOP_CHIP                  28
-#define CMD_STOP_CHIP_REPLY            29
-
-#define CMD_LEAF_GET_CARD_INFO2                32
-#define CMD_USBCAN_RESET_CLOCK         32
-#define CMD_USBCAN_CLOCK_OVERFLOW_EVENT        33
-
-#define CMD_GET_CARD_INFO              34
-#define CMD_GET_CARD_INFO_REPLY                35
-#define CMD_GET_SOFTWARE_INFO          38
-#define CMD_GET_SOFTWARE_INFO_REPLY    39
-#define CMD_ERROR_EVENT                        45
-#define CMD_FLUSH_QUEUE                        48
-#define CMD_RESET_ERROR_COUNTER                49
-#define CMD_TX_ACKNOWLEDGE             50
-#define CMD_CAN_ERROR_EVENT            51
-#define CMD_FLUSH_QUEUE_REPLY          68
-
-#define CMD_LEAF_USB_THROTTLE          77
-#define CMD_LEAF_LOG_MESSAGE           106
-
-/* error factors */
-#define M16C_EF_ACKE                   BIT(0)
-#define M16C_EF_CRCE                   BIT(1)
-#define M16C_EF_FORME                  BIT(2)
-#define M16C_EF_STFE                   BIT(3)
-#define M16C_EF_BITE0                  BIT(4)
-#define M16C_EF_BITE1                  BIT(5)
-#define M16C_EF_RCVE                   BIT(6)
-#define M16C_EF_TRE                    BIT(7)
-
-/* Only Leaf-based devices can report M16C error factors,
- * thus define our own error status flags for USBCANII
- */
-#define USBCAN_ERROR_STATE_NONE                0
-#define USBCAN_ERROR_STATE_TX_ERROR    BIT(0)
-#define USBCAN_ERROR_STATE_RX_ERROR    BIT(1)
-#define USBCAN_ERROR_STATE_BUSERROR    BIT(2)
-
-/* bittiming parameters */
-#define KVASER_USB_TSEG1_MIN           1
-#define KVASER_USB_TSEG1_MAX           16
-#define KVASER_USB_TSEG2_MIN           1
-#define KVASER_USB_TSEG2_MAX           8
-#define KVASER_USB_SJW_MAX             4
-#define KVASER_USB_BRP_MIN             1
-#define KVASER_USB_BRP_MAX             64
-#define KVASER_USB_BRP_INC             1
-
-/* ctrl modes */
-#define KVASER_CTRL_MODE_NORMAL                1
-#define KVASER_CTRL_MODE_SILENT                2
-#define KVASER_CTRL_MODE_SELFRECEPTION 3
-#define KVASER_CTRL_MODE_OFF           4
-
-/* Extended CAN identifier flag */
-#define KVASER_EXTENDED_FRAME          BIT(31)
-
-/* Kvaser USB CAN dongles are divided into two major families:
- * - Leaf: Based on Renesas M32C, running firmware labeled as 'filo'
- * - UsbcanII: Based on Renesas M16C, running firmware labeled as 'helios'
- */
-enum kvaser_usb_family {
-       KVASER_LEAF,
-       KVASER_USBCAN,
-};
-
-struct kvaser_msg_simple {
-       u8 tid;
-       u8 channel;
-} __packed;
-
-struct kvaser_msg_cardinfo {
-       u8 tid;
-       u8 nchannels;
-       union {
-               struct {
-                       __le32 serial_number;
-                       __le32 padding;
-               } __packed leaf0;
-               struct {
-                       __le32 serial_number_low;
-                       __le32 serial_number_high;
-               } __packed usbcan0;
-       } __packed;
-       __le32 clock_resolution;
-       __le32 mfgdate;
-       u8 ean[8];
-       u8 hw_revision;
-       union {
-               struct {
-                       u8 usb_hs_mode;
-               } __packed leaf1;
-               struct {
-                       u8 padding;
-               } __packed usbcan1;
-       } __packed;
-       __le16 padding;
-} __packed;
-
-struct kvaser_msg_cardinfo2 {
-       u8 tid;
-       u8 reserved;
-       u8 pcb_id[24];
-       __le32 oem_unlock_code;
-} __packed;
-
-struct leaf_msg_softinfo {
-       u8 tid;
-       u8 padding0;
-       __le32 sw_options;
-       __le32 fw_version;
-       __le16 max_outstanding_tx;
-       __le16 padding1[9];
-} __packed;
-
-struct usbcan_msg_softinfo {
-       u8 tid;
-       u8 fw_name[5];
-       __le16 max_outstanding_tx;
-       u8 padding[6];
-       __le32 fw_version;
-       __le16 checksum;
-       __le16 sw_options;
-} __packed;
-
-struct kvaser_msg_busparams {
-       u8 tid;
-       u8 channel;
-       __le32 bitrate;
-       u8 tseg1;
-       u8 tseg2;
-       u8 sjw;
-       u8 no_samp;
-} __packed;
-
-struct kvaser_msg_tx_can {
-       u8 channel;
-       u8 tid;
-       u8 msg[14];
-       union {
-               struct {
-                       u8 padding;
-                       u8 flags;
-               } __packed leaf;
-               struct {
-                       u8 flags;
-                       u8 padding;
-               } __packed usbcan;
-       } __packed;
-} __packed;
-
-struct kvaser_msg_rx_can_header {
-       u8 channel;
-       u8 flag;
-} __packed;
-
-struct leaf_msg_rx_can {
-       u8 channel;
-       u8 flag;
-
-       __le16 time[3];
-       u8 msg[14];
-} __packed;
-
-struct usbcan_msg_rx_can {
-       u8 channel;
-       u8 flag;
-
-       u8 msg[14];
-       __le16 time;
-} __packed;
-
-struct leaf_msg_chip_state_event {
-       u8 tid;
-       u8 channel;
-
-       __le16 time[3];
-       u8 tx_errors_count;
-       u8 rx_errors_count;
-
-       u8 status;
-       u8 padding[3];
-} __packed;
-
-struct usbcan_msg_chip_state_event {
-       u8 tid;
-       u8 channel;
-
-       u8 tx_errors_count;
-       u8 rx_errors_count;
-       __le16 time;
-
-       u8 status;
-       u8 padding[3];
-} __packed;
-
-struct kvaser_msg_tx_acknowledge_header {
-       u8 channel;
-       u8 tid;
-} __packed;
-
-struct leaf_msg_tx_acknowledge {
-       u8 channel;
-       u8 tid;
-
-       __le16 time[3];
-       u8 flags;
-       u8 time_offset;
-} __packed;
-
-struct usbcan_msg_tx_acknowledge {
-       u8 channel;
-       u8 tid;
-
-       __le16 time;
-       __le16 padding;
-} __packed;
-
-struct leaf_msg_error_event {
-       u8 tid;
-       u8 flags;
-       __le16 time[3];
-       u8 channel;
-       u8 padding;
-       u8 tx_errors_count;
-       u8 rx_errors_count;
-       u8 status;
-       u8 error_factor;
-} __packed;
-
-struct usbcan_msg_error_event {
-       u8 tid;
-       u8 padding;
-       u8 tx_errors_count_ch0;
-       u8 rx_errors_count_ch0;
-       u8 tx_errors_count_ch1;
-       u8 rx_errors_count_ch1;
-       u8 status_ch0;
-       u8 status_ch1;
-       __le16 time;
-} __packed;
-
-struct kvaser_msg_ctrl_mode {
-       u8 tid;
-       u8 channel;
-       u8 ctrl_mode;
-       u8 padding[3];
-} __packed;
-
-struct kvaser_msg_flush_queue {
-       u8 tid;
-       u8 channel;
-       u8 flags;
-       u8 padding[3];
-} __packed;
-
-struct leaf_msg_log_message {
-       u8 channel;
-       u8 flags;
-       __le16 time[3];
-       u8 dlc;
-       u8 time_offset;
-       __le32 id;
-       u8 data[8];
-} __packed;
-
-struct kvaser_msg {
-       u8 len;
-       u8 id;
-       union   {
-               struct kvaser_msg_simple simple;
-               struct kvaser_msg_cardinfo cardinfo;
-               struct kvaser_msg_cardinfo2 cardinfo2;
-               struct kvaser_msg_busparams busparams;
-
-               struct kvaser_msg_rx_can_header rx_can_header;
-               struct kvaser_msg_tx_acknowledge_header tx_acknowledge_header;
-
-               union {
-                       struct leaf_msg_softinfo softinfo;
-                       struct leaf_msg_rx_can rx_can;
-                       struct leaf_msg_chip_state_event chip_state_event;
-                       struct leaf_msg_tx_acknowledge tx_acknowledge;
-                       struct leaf_msg_error_event error_event;
-                       struct leaf_msg_log_message log_message;
-               } __packed leaf;
-
-               union {
-                       struct usbcan_msg_softinfo softinfo;
-                       struct usbcan_msg_rx_can rx_can;
-                       struct usbcan_msg_chip_state_event chip_state_event;
-                       struct usbcan_msg_tx_acknowledge tx_acknowledge;
-                       struct usbcan_msg_error_event error_event;
-               } __packed usbcan;
-
-               struct kvaser_msg_tx_can tx_can;
-               struct kvaser_msg_ctrl_mode ctrl_mode;
-               struct kvaser_msg_flush_queue flush_queue;
-       } u;
-} __packed;
-
-/* Summary of a kvaser error event, for a unified Leaf/Usbcan error
- * handling. Some discrepancies between the two families exist:
- *
- * - USBCAN firmware does not report M16C "error factors"
- * - USBCAN controllers has difficulties reporting if the raised error
- *   event is for ch0 or ch1. They leave such arbitration to the OS
- *   driver by letting it compare error counters with previous values
- *   and decide the error event's channel. Thus for USBCAN, the channel
- *   field is only advisory.
- */
-struct kvaser_usb_error_summary {
-       u8 channel, status, txerr, rxerr;
-       union {
-               struct {
-                       u8 error_factor;
-               } leaf;
-               struct {
-                       u8 other_ch_status;
-                       u8 error_state;
-               } usbcan;
-       };
-};
-
-/* Context for an outstanding, not yet ACKed, transmission */
-struct kvaser_usb_tx_urb_context {
-       struct kvaser_usb_net_priv *priv;
-       u32 echo_index;
-       int dlc;
-};
-
-struct kvaser_usb {
-       struct usb_device *udev;
-       struct kvaser_usb_net_priv *nets[MAX_NET_DEVICES];
-
-       struct usb_endpoint_descriptor *bulk_in, *bulk_out;
-       struct usb_anchor rx_submitted;
-
-       /* @max_tx_urbs: Firmware-reported maximum number of outstanding,
-        * not yet ACKed, transmissions on this device. This value is
-        * also used as a sentinel for marking free tx contexts.
-        */
-       u32 fw_version;
-       unsigned int nchannels;
-       unsigned int max_tx_urbs;
-       enum kvaser_usb_family family;
-
-       bool rxinitdone;
-       void *rxbuf[MAX_RX_URBS];
-       dma_addr_t rxbuf_dma[MAX_RX_URBS];
-};
-
-struct kvaser_usb_net_priv {
-       struct can_priv can;
-       struct can_berr_counter bec;
-
-       struct kvaser_usb *dev;
-       struct net_device *netdev;
-       int channel;
-
-       struct completion start_comp, stop_comp;
-       struct usb_anchor tx_submitted;
-
-       spinlock_t tx_contexts_lock;
-       int active_tx_contexts;
-       struct kvaser_usb_tx_urb_context tx_contexts[];
-};
-
-static const struct usb_device_id kvaser_usb_table[] = {
-       /* Leaf family IDs */
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID) },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID) },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS |
-                              KVASER_HAS_SILENT_MODE },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS |
-                              KVASER_HAS_SILENT_MODE },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LS_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS |
-                              KVASER_HAS_SILENT_MODE },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_SWC_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS |
-                              KVASER_HAS_SILENT_MODE },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LIN_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS |
-                              KVASER_HAS_SILENT_MODE },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_LS_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS |
-                              KVASER_HAS_SILENT_MODE },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_SWC_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS |
-                              KVASER_HAS_SILENT_MODE },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_DEVEL_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS |
-                              KVASER_HAS_SILENT_MODE },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSHS_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS |
-                              KVASER_HAS_SILENT_MODE },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_UPRO_HSHS_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID) },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_OBDII_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS |
-                              KVASER_HAS_SILENT_MODE },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSLS_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_CH_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_SPRO_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_MERCURY_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_LEAF_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_CAN_R_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) },
-
-       /* USBCANII family IDs */
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_REVB_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMORATOR_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-       { USB_DEVICE(KVASER_VENDOR_ID, USB_VCI2_PRODUCT_ID),
-               .driver_info = KVASER_HAS_TXRX_ERRORS },
-
-       { }
-};
-MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
-
-static inline int kvaser_usb_send_msg(const struct kvaser_usb *dev,
-                                     struct kvaser_msg *msg)
-{
-       int actual_len;
-
-       return usb_bulk_msg(dev->udev,
-                           usb_sndbulkpipe(dev->udev,
-                                       dev->bulk_out->bEndpointAddress),
-                           msg, msg->len, &actual_len,
-                           USB_SEND_TIMEOUT);
-}
-
-static int kvaser_usb_wait_msg(const struct kvaser_usb *dev, u8 id,
-                              struct kvaser_msg *msg)
-{
-       struct kvaser_msg *tmp;
-       void *buf;
-       int actual_len;
-       int err;
-       int pos;
-       unsigned long to = jiffies + msecs_to_jiffies(USB_RECV_TIMEOUT);
-
-       buf = kzalloc(RX_BUFFER_SIZE, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
-
-       do {
-               err = usb_bulk_msg(dev->udev,
-                                  usb_rcvbulkpipe(dev->udev,
-                                       dev->bulk_in->bEndpointAddress),
-                                  buf, RX_BUFFER_SIZE, &actual_len,
-                                  USB_RECV_TIMEOUT);
-               if (err < 0)
-                       goto end;
-
-               pos = 0;
-               while (pos <= actual_len - MSG_HEADER_LEN) {
-                       tmp = buf + pos;
-
-                       /* Handle messages crossing the USB endpoint max packet
-                        * size boundary. Check kvaser_usb_read_bulk_callback()
-                        * for further details.
-                        */
-                       if (tmp->len == 0) {
-                               pos = round_up(pos, le16_to_cpu(dev->bulk_in->
-                                                               wMaxPacketSize));
-                               continue;
-                       }
-
-                       if (pos + tmp->len > actual_len) {
-                               dev_err_ratelimited(dev->udev->dev.parent,
-                                                   "Format error\n");
-                               break;
-                       }
-
-                       if (tmp->id == id) {
-                               memcpy(msg, tmp, tmp->len);
-                               goto end;
-                       }
-
-                       pos += tmp->len;
-               }
-       } while (time_before(jiffies, to));
-
-       err = -EINVAL;
-
-end:
-       kfree(buf);
-
-       return err;
-}
-
-static int kvaser_usb_send_simple_msg(const struct kvaser_usb *dev,
-                                     u8 msg_id, int channel)
-{
-       struct kvaser_msg *msg;
-       int rc;
-
-       msg = kmalloc(sizeof(*msg), GFP_KERNEL);
-       if (!msg)
-               return -ENOMEM;
-
-       msg->id = msg_id;
-       msg->len = MSG_HEADER_LEN + sizeof(struct kvaser_msg_simple);
-       msg->u.simple.channel = channel;
-       msg->u.simple.tid = 0xff;
-
-       rc = kvaser_usb_send_msg(dev, msg);
-
-       kfree(msg);
-       return rc;
-}
-
-static int kvaser_usb_get_software_info(struct kvaser_usb *dev)
-{
-       struct kvaser_msg msg;
-       int err;
-
-       err = kvaser_usb_send_simple_msg(dev, CMD_GET_SOFTWARE_INFO, 0);
-       if (err)
-               return err;
-
-       err = kvaser_usb_wait_msg(dev, CMD_GET_SOFTWARE_INFO_REPLY, &msg);
-       if (err)
-               return err;
-
-       switch (dev->family) {
-       case KVASER_LEAF:
-               dev->fw_version = le32_to_cpu(msg.u.leaf.softinfo.fw_version);
-               dev->max_tx_urbs =
-                       le16_to_cpu(msg.u.leaf.softinfo.max_outstanding_tx);
-               break;
-       case KVASER_USBCAN:
-               dev->fw_version = le32_to_cpu(msg.u.usbcan.softinfo.fw_version);
-               dev->max_tx_urbs =
-                       le16_to_cpu(msg.u.usbcan.softinfo.max_outstanding_tx);
-               break;
-       }
-
-       return 0;
-}
-
-static int kvaser_usb_get_card_info(struct kvaser_usb *dev)
-{
-       struct kvaser_msg msg;
-       int err;
-
-       err = kvaser_usb_send_simple_msg(dev, CMD_GET_CARD_INFO, 0);
-       if (err)
-               return err;
-
-       err = kvaser_usb_wait_msg(dev, CMD_GET_CARD_INFO_REPLY, &msg);
-       if (err)
-               return err;
-
-       dev->nchannels = msg.u.cardinfo.nchannels;
-       if ((dev->nchannels > MAX_NET_DEVICES) ||
-           (dev->family == KVASER_USBCAN &&
-            dev->nchannels > MAX_USBCAN_NET_DEVICES))
-               return -EINVAL;
-
-       return 0;
-}
-
-static void kvaser_usb_tx_acknowledge(const struct kvaser_usb *dev,
-                                     const struct kvaser_msg *msg)
-{
-       struct net_device_stats *stats;
-       struct kvaser_usb_tx_urb_context *context;
-       struct kvaser_usb_net_priv *priv;
-       struct sk_buff *skb;
-       struct can_frame *cf;
-       unsigned long flags;
-       u8 channel, tid;
-
-       channel = msg->u.tx_acknowledge_header.channel;
-       tid = msg->u.tx_acknowledge_header.tid;
-
-       if (channel >= dev->nchannels) {
-               dev_err(dev->udev->dev.parent,
-                       "Invalid channel number (%d)\n", channel);
-               return;
-       }
-
-       priv = dev->nets[channel];
-
-       if (!netif_device_present(priv->netdev))
-               return;
-
-       stats = &priv->netdev->stats;
-
-       context = &priv->tx_contexts[tid % dev->max_tx_urbs];
-
-       /* Sometimes the state change doesn't come after a bus-off event */
-       if (priv->can.restart_ms &&
-           (priv->can.state >= CAN_STATE_BUS_OFF)) {
-               skb = alloc_can_err_skb(priv->netdev, &cf);
-               if (skb) {
-                       cf->can_id |= CAN_ERR_RESTARTED;
-
-                       stats->rx_packets++;
-                       stats->rx_bytes += cf->can_dlc;
-                       netif_rx(skb);
-               } else {
-                       netdev_err(priv->netdev,
-                                  "No memory left for err_skb\n");
-               }
-
-               priv->can.can_stats.restarts++;
-               netif_carrier_on(priv->netdev);
-
-               priv->can.state = CAN_STATE_ERROR_ACTIVE;
-       }
-
-       stats->tx_packets++;
-       stats->tx_bytes += context->dlc;
-
-       spin_lock_irqsave(&priv->tx_contexts_lock, flags);
-
-       can_get_echo_skb(priv->netdev, context->echo_index);
-       context->echo_index = dev->max_tx_urbs;
-       --priv->active_tx_contexts;
-       netif_wake_queue(priv->netdev);
-
-       spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
-}
-
-static void kvaser_usb_simple_msg_callback(struct urb *urb)
-{
-       struct net_device *netdev = urb->context;
-
-       kfree(urb->transfer_buffer);
-
-       if (urb->status)
-               netdev_warn(netdev, "urb status received: %d\n",
-                           urb->status);
-}
-
-static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv,
-                                      u8 msg_id)
-{
-       struct kvaser_usb *dev = priv->dev;
-       struct net_device *netdev = priv->netdev;
-       struct kvaser_msg *msg;
-       struct urb *urb;
-       void *buf;
-       int err;
-
-       urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!urb)
-               return -ENOMEM;
-
-       buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC);
-       if (!buf) {
-               usb_free_urb(urb);
-               return -ENOMEM;
-       }
-
-       msg = (struct kvaser_msg *)buf;
-       msg->len = MSG_HEADER_LEN + sizeof(struct kvaser_msg_simple);
-       msg->id = msg_id;
-       msg->u.simple.channel = priv->channel;
-
-       usb_fill_bulk_urb(urb, dev->udev,
-                         usb_sndbulkpipe(dev->udev,
-                                         dev->bulk_out->bEndpointAddress),
-                         buf, msg->len,
-                         kvaser_usb_simple_msg_callback, netdev);
-       usb_anchor_urb(urb, &priv->tx_submitted);
-
-       err = usb_submit_urb(urb, GFP_ATOMIC);
-       if (err) {
-               netdev_err(netdev, "Error transmitting URB\n");
-               usb_unanchor_urb(urb);
-               kfree(buf);
-               usb_free_urb(urb);
-               return err;
-       }
-
-       usb_free_urb(urb);
-
-       return 0;
-}
-
-static void kvaser_usb_rx_error_update_can_state(struct kvaser_usb_net_priv *priv,
-                                                const struct kvaser_usb_error_summary *es,
-                                                struct can_frame *cf)
-{
-       struct kvaser_usb *dev = priv->dev;
-       struct net_device_stats *stats = &priv->netdev->stats;
-       enum can_state cur_state, new_state, tx_state, rx_state;
-
-       netdev_dbg(priv->netdev, "Error status: 0x%02x\n", es->status);
-
-       new_state = cur_state = priv->can.state;
-
-       if (es->status & (M16C_STATE_BUS_OFF | M16C_STATE_BUS_RESET))
-               new_state = CAN_STATE_BUS_OFF;
-       else if (es->status & M16C_STATE_BUS_PASSIVE)
-               new_state = CAN_STATE_ERROR_PASSIVE;
-       else if (es->status & M16C_STATE_BUS_ERROR) {
-               /* Guard against spurious error events after a busoff */
-               if (cur_state < CAN_STATE_BUS_OFF) {
-                       if ((es->txerr >= 128) || (es->rxerr >= 128))
-                               new_state = CAN_STATE_ERROR_PASSIVE;
-                       else if ((es->txerr >= 96) || (es->rxerr >= 96))
-                               new_state = CAN_STATE_ERROR_WARNING;
-                       else if (cur_state > CAN_STATE_ERROR_ACTIVE)
-                               new_state = CAN_STATE_ERROR_ACTIVE;
-               }
-       }
-
-       if (!es->status)
-               new_state = CAN_STATE_ERROR_ACTIVE;
-
-       if (new_state != cur_state) {
-               tx_state = (es->txerr >= es->rxerr) ? new_state : 0;
-               rx_state = (es->txerr <= es->rxerr) ? new_state : 0;
-
-               can_change_state(priv->netdev, cf, tx_state, rx_state);
-       }
-
-       if (priv->can.restart_ms &&
-           (cur_state >= CAN_STATE_BUS_OFF) &&
-           (new_state < CAN_STATE_BUS_OFF)) {
-               priv->can.can_stats.restarts++;
-       }
-
-       switch (dev->family) {
-       case KVASER_LEAF:
-               if (es->leaf.error_factor) {
-                       priv->can.can_stats.bus_error++;
-                       stats->rx_errors++;
-               }
-               break;
-       case KVASER_USBCAN:
-               if (es->usbcan.error_state & USBCAN_ERROR_STATE_TX_ERROR)
-                       stats->tx_errors++;
-               if (es->usbcan.error_state & USBCAN_ERROR_STATE_RX_ERROR)
-                       stats->rx_errors++;
-               if (es->usbcan.error_state & USBCAN_ERROR_STATE_BUSERROR) {
-                       priv->can.can_stats.bus_error++;
-               }
-               break;
-       }
-
-       priv->bec.txerr = es->txerr;
-       priv->bec.rxerr = es->rxerr;
-}
-
-static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
-                               const struct kvaser_usb_error_summary *es)
-{
-       struct can_frame *cf, tmp_cf = { .can_id = CAN_ERR_FLAG, .can_dlc = CAN_ERR_DLC };
-       struct sk_buff *skb;
-       struct net_device_stats *stats;
-       struct kvaser_usb_net_priv *priv;
-       enum can_state old_state, new_state;
-
-       if (es->channel >= dev->nchannels) {
-               dev_err(dev->udev->dev.parent,
-                       "Invalid channel number (%d)\n", es->channel);
-               return;
-       }
-
-       priv = dev->nets[es->channel];
-       stats = &priv->netdev->stats;
-
-       /* Update all of the can interface's state and error counters before
-        * trying any memory allocation that can actually fail with -ENOMEM.
-        *
-        * We send a temporary stack-allocated error can frame to
-        * can_change_state() for the very same reason.
-        *
-        * TODO: Split can_change_state() responsibility between updating the
-        * can interface's state and counters, and the setting up of can error
-        * frame ID and data to userspace. Remove stack allocation afterwards.
-        */
-       old_state = priv->can.state;
-       kvaser_usb_rx_error_update_can_state(priv, es, &tmp_cf);
-       new_state = priv->can.state;
-
-       skb = alloc_can_err_skb(priv->netdev, &cf);
-       if (!skb) {
-               stats->rx_dropped++;
-               return;
-       }
-       memcpy(cf, &tmp_cf, sizeof(*cf));
-
-       if (new_state != old_state) {
-               if (es->status &
-                   (M16C_STATE_BUS_OFF | M16C_STATE_BUS_RESET)) {
-                       if (!priv->can.restart_ms)
-                               kvaser_usb_simple_msg_async(priv, CMD_STOP_CHIP);
-                       netif_carrier_off(priv->netdev);
-               }
-
-               if (priv->can.restart_ms &&
-                   (old_state >= CAN_STATE_BUS_OFF) &&
-                   (new_state < CAN_STATE_BUS_OFF)) {
-                       cf->can_id |= CAN_ERR_RESTARTED;
-                       netif_carrier_on(priv->netdev);
-               }
-       }
-
-       switch (dev->family) {
-       case KVASER_LEAF:
-               if (es->leaf.error_factor) {
-                       cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
-
-                       if (es->leaf.error_factor & M16C_EF_ACKE)
-                               cf->data[3] = CAN_ERR_PROT_LOC_ACK;
-                       if (es->leaf.error_factor & M16C_EF_CRCE)
-                               cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
-                       if (es->leaf.error_factor & M16C_EF_FORME)
-                               cf->data[2] |= CAN_ERR_PROT_FORM;
-                       if (es->leaf.error_factor & M16C_EF_STFE)
-                               cf->data[2] |= CAN_ERR_PROT_STUFF;
-                       if (es->leaf.error_factor & M16C_EF_BITE0)
-                               cf->data[2] |= CAN_ERR_PROT_BIT0;
-                       if (es->leaf.error_factor & M16C_EF_BITE1)
-                               cf->data[2] |= CAN_ERR_PROT_BIT1;
-                       if (es->leaf.error_factor & M16C_EF_TRE)
-                               cf->data[2] |= CAN_ERR_PROT_TX;
-               }
-               break;
-       case KVASER_USBCAN:
-               if (es->usbcan.error_state & USBCAN_ERROR_STATE_BUSERROR) {
-                       cf->can_id |= CAN_ERR_BUSERROR;
-               }
-               break;
-       }
-
-       cf->data[6] = es->txerr;
-       cf->data[7] = es->rxerr;
-
-       stats->rx_packets++;
-       stats->rx_bytes += cf->can_dlc;
-       netif_rx(skb);
-}
-
-/* For USBCAN, report error to userspace iff the channels's errors counter
- * has changed, or we're the only channel seeing a bus error state.
- */
-static void kvaser_usbcan_conditionally_rx_error(const struct kvaser_usb *dev,
-                                                struct kvaser_usb_error_summary *es)
-{
-       struct kvaser_usb_net_priv *priv;
-       int channel;
-       bool report_error;
-
-       channel = es->channel;
-       if (channel >= dev->nchannels) {
-               dev_err(dev->udev->dev.parent,
-                       "Invalid channel number (%d)\n", channel);
-               return;
-       }
-
-       priv = dev->nets[channel];
-       report_error = false;
-
-       if (es->txerr != priv->bec.txerr) {
-               es->usbcan.error_state |= USBCAN_ERROR_STATE_TX_ERROR;
-               report_error = true;
-       }
-       if (es->rxerr != priv->bec.rxerr) {
-               es->usbcan.error_state |= USBCAN_ERROR_STATE_RX_ERROR;
-               report_error = true;
-       }
-       if ((es->status & M16C_STATE_BUS_ERROR) &&
-           !(es->usbcan.other_ch_status & M16C_STATE_BUS_ERROR)) {
-               es->usbcan.error_state |= USBCAN_ERROR_STATE_BUSERROR;
-               report_error = true;
-       }
-
-       if (report_error)
-               kvaser_usb_rx_error(dev, es);
-}
-
-static void kvaser_usbcan_rx_error(const struct kvaser_usb *dev,
-                                  const struct kvaser_msg *msg)
-{
-       struct kvaser_usb_error_summary es = { };
-
-       switch (msg->id) {
-       /* Sometimes errors are sent as unsolicited chip state events */
-       case CMD_CHIP_STATE_EVENT:
-               es.channel = msg->u.usbcan.chip_state_event.channel;
-               es.status =  msg->u.usbcan.chip_state_event.status;
-               es.txerr = msg->u.usbcan.chip_state_event.tx_errors_count;
-               es.rxerr = msg->u.usbcan.chip_state_event.rx_errors_count;
-               kvaser_usbcan_conditionally_rx_error(dev, &es);
-               break;
-
-       case CMD_CAN_ERROR_EVENT:
-               es.channel = 0;
-               es.status = msg->u.usbcan.error_event.status_ch0;
-               es.txerr = msg->u.usbcan.error_event.tx_errors_count_ch0;
-               es.rxerr = msg->u.usbcan.error_event.rx_errors_count_ch0;
-               es.usbcan.other_ch_status =
-                       msg->u.usbcan.error_event.status_ch1;
-               kvaser_usbcan_conditionally_rx_error(dev, &es);
-
-               /* The USBCAN firmware supports up to 2 channels.
-                * Now that ch0 was checked, check if ch1 has any errors.
-                */
-               if (dev->nchannels == MAX_USBCAN_NET_DEVICES) {
-                       es.channel = 1;
-                       es.status = msg->u.usbcan.error_event.status_ch1;
-                       es.txerr = msg->u.usbcan.error_event.tx_errors_count_ch1;
-                       es.rxerr = msg->u.usbcan.error_event.rx_errors_count_ch1;
-                       es.usbcan.other_ch_status =
-                               msg->u.usbcan.error_event.status_ch0;
-                       kvaser_usbcan_conditionally_rx_error(dev, &es);
-               }
-               break;
-
-       default:
-               dev_err(dev->udev->dev.parent, "Invalid msg id (%d)\n",
-                       msg->id);
-       }
-}
-
-static void kvaser_leaf_rx_error(const struct kvaser_usb *dev,
-                                const struct kvaser_msg *msg)
-{
-       struct kvaser_usb_error_summary es = { };
-
-       switch (msg->id) {
-       case CMD_CAN_ERROR_EVENT:
-               es.channel = msg->u.leaf.error_event.channel;
-               es.status =  msg->u.leaf.error_event.status;
-               es.txerr = msg->u.leaf.error_event.tx_errors_count;
-               es.rxerr = msg->u.leaf.error_event.rx_errors_count;
-               es.leaf.error_factor = msg->u.leaf.error_event.error_factor;
-               break;
-       case CMD_LEAF_LOG_MESSAGE:
-               es.channel = msg->u.leaf.log_message.channel;
-               es.status = msg->u.leaf.log_message.data[0];
-               es.txerr = msg->u.leaf.log_message.data[2];
-               es.rxerr = msg->u.leaf.log_message.data[3];
-               es.leaf.error_factor = msg->u.leaf.log_message.data[1];
-               break;
-       case CMD_CHIP_STATE_EVENT:
-               es.channel = msg->u.leaf.chip_state_event.channel;
-               es.status =  msg->u.leaf.chip_state_event.status;
-               es.txerr = msg->u.leaf.chip_state_event.tx_errors_count;
-               es.rxerr = msg->u.leaf.chip_state_event.rx_errors_count;
-               es.leaf.error_factor = 0;
-               break;
-       default:
-               dev_err(dev->udev->dev.parent, "Invalid msg id (%d)\n",
-                       msg->id);
-               return;
-       }
-
-       kvaser_usb_rx_error(dev, &es);
-}
-
-static void kvaser_usb_rx_can_err(const struct kvaser_usb_net_priv *priv,
-                                 const struct kvaser_msg *msg)
-{
-       struct can_frame *cf;
-       struct sk_buff *skb;
-       struct net_device_stats *stats = &priv->netdev->stats;
-
-       if (msg->u.rx_can_header.flag & (MSG_FLAG_ERROR_FRAME |
-                                        MSG_FLAG_NERR)) {
-               netdev_err(priv->netdev, "Unknown error (flags: 0x%02x)\n",
-                          msg->u.rx_can_header.flag);
-
-               stats->rx_errors++;
-               return;
-       }
-
-       if (msg->u.rx_can_header.flag & MSG_FLAG_OVERRUN) {
-               stats->rx_over_errors++;
-               stats->rx_errors++;
-
-               skb = alloc_can_err_skb(priv->netdev, &cf);
-               if (!skb) {
-                       stats->rx_dropped++;
-                       return;
-               }
-
-               cf->can_id |= CAN_ERR_CRTL;
-               cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
-
-               stats->rx_packets++;
-               stats->rx_bytes += cf->can_dlc;
-               netif_rx(skb);
-       }
-}
-
-static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev,
-                                 const struct kvaser_msg *msg)
-{
-       struct kvaser_usb_net_priv *priv;
-       struct can_frame *cf;
-       struct sk_buff *skb;
-       struct net_device_stats *stats;
-       u8 channel = msg->u.rx_can_header.channel;
-       const u8 *rx_msg = NULL;        /* GCC */
-
-       if (channel >= dev->nchannels) {
-               dev_err(dev->udev->dev.parent,
-                       "Invalid channel number (%d)\n", channel);
-               return;
-       }
-
-       priv = dev->nets[channel];
-       stats = &priv->netdev->stats;
-
-       if ((msg->u.rx_can_header.flag & MSG_FLAG_ERROR_FRAME) &&
-           (dev->family == KVASER_LEAF && msg->id == CMD_LEAF_LOG_MESSAGE)) {
-               kvaser_leaf_rx_error(dev, msg);
-               return;
-       } else if (msg->u.rx_can_header.flag & (MSG_FLAG_ERROR_FRAME |
-                                               MSG_FLAG_NERR |
-                                               MSG_FLAG_OVERRUN)) {
-               kvaser_usb_rx_can_err(priv, msg);
-               return;
-       } else if (msg->u.rx_can_header.flag & ~MSG_FLAG_REMOTE_FRAME) {
-               netdev_warn(priv->netdev,
-                           "Unhandled frame (flags: 0x%02x)",
-                           msg->u.rx_can_header.flag);
-               return;
-       }
-
-       switch (dev->family) {
-       case KVASER_LEAF:
-               rx_msg = msg->u.leaf.rx_can.msg;
-               break;
-       case KVASER_USBCAN:
-               rx_msg = msg->u.usbcan.rx_can.msg;
-               break;
-       }
-
-       skb = alloc_can_skb(priv->netdev, &cf);
-       if (!skb) {
-               stats->rx_dropped++;
-               return;
-       }
-
-       if (dev->family == KVASER_LEAF && msg->id == CMD_LEAF_LOG_MESSAGE) {
-               cf->can_id = le32_to_cpu(msg->u.leaf.log_message.id);
-               if (cf->can_id & KVASER_EXTENDED_FRAME)
-                       cf->can_id &= CAN_EFF_MASK | CAN_EFF_FLAG;
-               else
-                       cf->can_id &= CAN_SFF_MASK;
-
-               cf->can_dlc = get_can_dlc(msg->u.leaf.log_message.dlc);
-
-               if (msg->u.leaf.log_message.flags & MSG_FLAG_REMOTE_FRAME)
-                       cf->can_id |= CAN_RTR_FLAG;
-               else
-                       memcpy(cf->data, &msg->u.leaf.log_message.data,
-                              cf->can_dlc);
-       } else {
-               cf->can_id = ((rx_msg[0] & 0x1f) << 6) | (rx_msg[1] & 0x3f);
-
-               if (msg->id == CMD_RX_EXT_MESSAGE) {
-                       cf->can_id <<= 18;
-                       cf->can_id |= ((rx_msg[2] & 0x0f) << 14) |
-                                     ((rx_msg[3] & 0xff) << 6) |
-                                     (rx_msg[4] & 0x3f);
-                       cf->can_id |= CAN_EFF_FLAG;
-               }
-
-               cf->can_dlc = get_can_dlc(rx_msg[5]);
-
-               if (msg->u.rx_can_header.flag & MSG_FLAG_REMOTE_FRAME)
-                       cf->can_id |= CAN_RTR_FLAG;
-               else
-                       memcpy(cf->data, &rx_msg[6],
-                              cf->can_dlc);
-       }
-
-       stats->rx_packets++;
-       stats->rx_bytes += cf->can_dlc;
-       netif_rx(skb);
-}
-
-static void kvaser_usb_start_chip_reply(const struct kvaser_usb *dev,
-                                       const struct kvaser_msg *msg)
-{
-       struct kvaser_usb_net_priv *priv;
-       u8 channel = msg->u.simple.channel;
-
-       if (channel >= dev->nchannels) {
-               dev_err(dev->udev->dev.parent,
-                       "Invalid channel number (%d)\n", channel);
-               return;
-       }
-
-       priv = dev->nets[channel];
-
-       if (completion_done(&priv->start_comp) &&
-           netif_queue_stopped(priv->netdev)) {
-               netif_wake_queue(priv->netdev);
-       } else {
-               netif_start_queue(priv->netdev);
-               complete(&priv->start_comp);
-       }
-}
-
-static void kvaser_usb_stop_chip_reply(const struct kvaser_usb *dev,
-                                      const struct kvaser_msg *msg)
-{
-       struct kvaser_usb_net_priv *priv;
-       u8 channel = msg->u.simple.channel;
-
-       if (channel >= dev->nchannels) {
-               dev_err(dev->udev->dev.parent,
-                       "Invalid channel number (%d)\n", channel);
-               return;
-       }
-
-       priv = dev->nets[channel];
-
-       complete(&priv->stop_comp);
-}
-
-static void kvaser_usb_handle_message(const struct kvaser_usb *dev,
-                                     const struct kvaser_msg *msg)
-{
-       switch (msg->id) {
-       case CMD_START_CHIP_REPLY:
-               kvaser_usb_start_chip_reply(dev, msg);
-               break;
-
-       case CMD_STOP_CHIP_REPLY:
-               kvaser_usb_stop_chip_reply(dev, msg);
-               break;
-
-       case CMD_RX_STD_MESSAGE:
-       case CMD_RX_EXT_MESSAGE:
-               kvaser_usb_rx_can_msg(dev, msg);
-               break;
-
-       case CMD_LEAF_LOG_MESSAGE:
-               if (dev->family != KVASER_LEAF)
-                       goto warn;
-               kvaser_usb_rx_can_msg(dev, msg);
-               break;
-
-       case CMD_CHIP_STATE_EVENT:
-       case CMD_CAN_ERROR_EVENT:
-               if (dev->family == KVASER_LEAF)
-                       kvaser_leaf_rx_error(dev, msg);
-               else
-                       kvaser_usbcan_rx_error(dev, msg);
-               break;
-
-       case CMD_TX_ACKNOWLEDGE:
-               kvaser_usb_tx_acknowledge(dev, msg);
-               break;
-
-       /* Ignored messages */
-       case CMD_USBCAN_CLOCK_OVERFLOW_EVENT:
-               if (dev->family != KVASER_USBCAN)
-                       goto warn;
-               break;
-
-       case CMD_FLUSH_QUEUE_REPLY:
-               if (dev->family != KVASER_LEAF)
-                       goto warn;
-               break;
-
-       default:
-warn:          dev_warn(dev->udev->dev.parent,
-                        "Unhandled message (%d)\n", msg->id);
-               break;
-       }
-}
-
-static void kvaser_usb_read_bulk_callback(struct urb *urb)
-{
-       struct kvaser_usb *dev = urb->context;
-       struct kvaser_msg *msg;
-       int pos = 0;
-       int err, i;
-
-       switch (urb->status) {
-       case 0:
-               break;
-       case -ENOENT:
-       case -EPIPE:
-       case -EPROTO:
-       case -ESHUTDOWN:
-               return;
-       default:
-               dev_info(dev->udev->dev.parent, "Rx URB aborted (%d)\n",
-                        urb->status);
-               goto resubmit_urb;
-       }
-
-       while (pos <= (int)(urb->actual_length - MSG_HEADER_LEN)) {
-               msg = urb->transfer_buffer + pos;
-
-               /* The Kvaser firmware can only read and write messages that
-                * does not cross the USB's endpoint wMaxPacketSize boundary.
-                * If a follow-up command crosses such boundary, firmware puts
-                * a placeholder zero-length command in its place then aligns
-                * the real command to the next max packet size.
-                *
-                * Handle such cases or we're going to miss a significant
-                * number of events in case of a heavy rx load on the bus.
-                */
-               if (msg->len == 0) {
-                       pos = round_up(pos, le16_to_cpu(dev->bulk_in->
-                                                       wMaxPacketSize));
-                       continue;
-               }
-
-               if (pos + msg->len > urb->actual_length) {
-                       dev_err_ratelimited(dev->udev->dev.parent,
-                                           "Format error\n");
-                       break;
-               }
-
-               kvaser_usb_handle_message(dev, msg);
-               pos += msg->len;
-       }
-
-resubmit_urb:
-       usb_fill_bulk_urb(urb, dev->udev,
-                         usb_rcvbulkpipe(dev->udev,
-                                         dev->bulk_in->bEndpointAddress),
-                         urb->transfer_buffer, RX_BUFFER_SIZE,
-                         kvaser_usb_read_bulk_callback, dev);
-
-       err = usb_submit_urb(urb, GFP_ATOMIC);
-       if (err == -ENODEV) {
-               for (i = 0; i < dev->nchannels; i++) {
-                       if (!dev->nets[i])
-                               continue;
-
-                       netif_device_detach(dev->nets[i]->netdev);
-               }
-       } else if (err) {
-               dev_err(dev->udev->dev.parent,
-                       "Failed resubmitting read bulk urb: %d\n", err);
-       }
-
-       return;
-}
-
-static int kvaser_usb_setup_rx_urbs(struct kvaser_usb *dev)
-{
-       int i, err = 0;
-
-       if (dev->rxinitdone)
-               return 0;
-
-       for (i = 0; i < MAX_RX_URBS; i++) {
-               struct urb *urb = NULL;
-               u8 *buf = NULL;
-               dma_addr_t buf_dma;
-
-               urb = usb_alloc_urb(0, GFP_KERNEL);
-               if (!urb) {
-                       err = -ENOMEM;
-                       break;
-               }
-
-               buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE,
-                                        GFP_KERNEL, &buf_dma);
-               if (!buf) {
-                       dev_warn(dev->udev->dev.parent,
-                                "No memory left for USB buffer\n");
-                       usb_free_urb(urb);
-                       err = -ENOMEM;
-                       break;
-               }
-
-               usb_fill_bulk_urb(urb, dev->udev,
-                                 usb_rcvbulkpipe(dev->udev,
-                                         dev->bulk_in->bEndpointAddress),
-                                 buf, RX_BUFFER_SIZE,
-                                 kvaser_usb_read_bulk_callback,
-                                 dev);
-               urb->transfer_dma = buf_dma;
-               urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
-               usb_anchor_urb(urb, &dev->rx_submitted);
-
-               err = usb_submit_urb(urb, GFP_KERNEL);
-               if (err) {
-                       usb_unanchor_urb(urb);
-                       usb_free_coherent(dev->udev, RX_BUFFER_SIZE, buf,
-                                         buf_dma);
-                       usb_free_urb(urb);
-                       break;
-               }
-
-               dev->rxbuf[i] = buf;
-               dev->rxbuf_dma[i] = buf_dma;
-
-               usb_free_urb(urb);
-       }
-
-       if (i == 0) {
-               dev_warn(dev->udev->dev.parent,
-                        "Cannot setup read URBs, error %d\n", err);
-               return err;
-       } else if (i < MAX_RX_URBS) {
-               dev_warn(dev->udev->dev.parent,
-                        "RX performances may be slow\n");
-       }
-
-       dev->rxinitdone = true;
-
-       return 0;
-}
-
-static int kvaser_usb_set_opt_mode(const struct kvaser_usb_net_priv *priv)
-{
-       struct kvaser_msg *msg;
-       int rc;
-
-       msg = kmalloc(sizeof(*msg), GFP_KERNEL);
-       if (!msg)
-               return -ENOMEM;
-
-       msg->id = CMD_SET_CTRL_MODE;
-       msg->len = MSG_HEADER_LEN + sizeof(struct kvaser_msg_ctrl_mode);
-       msg->u.ctrl_mode.tid = 0xff;
-       msg->u.ctrl_mode.channel = priv->channel;
-
-       if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
-               msg->u.ctrl_mode.ctrl_mode = KVASER_CTRL_MODE_SILENT;
-       else
-               msg->u.ctrl_mode.ctrl_mode = KVASER_CTRL_MODE_NORMAL;
-
-       rc = kvaser_usb_send_msg(priv->dev, msg);
-
-       kfree(msg);
-       return rc;
-}
-
-static int kvaser_usb_start_chip(struct kvaser_usb_net_priv *priv)
-{
-       int err;
-
-       init_completion(&priv->start_comp);
-
-       err = kvaser_usb_send_simple_msg(priv->dev, CMD_START_CHIP,
-                                        priv->channel);
-       if (err)
-               return err;
-
-       if (!wait_for_completion_timeout(&priv->start_comp,
-                                        msecs_to_jiffies(START_TIMEOUT)))
-               return -ETIMEDOUT;
-
-       return 0;
-}
-
-static int kvaser_usb_open(struct net_device *netdev)
-{
-       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
-       struct kvaser_usb *dev = priv->dev;
-       int err;
-
-       err = open_candev(netdev);
-       if (err)
-               return err;
-
-       err = kvaser_usb_setup_rx_urbs(dev);
-       if (err)
-               goto error;
-
-       err = kvaser_usb_set_opt_mode(priv);
-       if (err)
-               goto error;
-
-       err = kvaser_usb_start_chip(priv);
-       if (err) {
-               netdev_warn(netdev, "Cannot start device, error %d\n", err);
-               goto error;
-       }
-
-       priv->can.state = CAN_STATE_ERROR_ACTIVE;
-
-       return 0;
-
-error:
-       close_candev(netdev);
-       return err;
-}
-
-static void kvaser_usb_reset_tx_urb_contexts(struct kvaser_usb_net_priv *priv)
-{
-       int i, max_tx_urbs;
-
-       max_tx_urbs = priv->dev->max_tx_urbs;
-
-       priv->active_tx_contexts = 0;
-       for (i = 0; i < max_tx_urbs; i++)
-               priv->tx_contexts[i].echo_index = max_tx_urbs;
-}
-
-/* This method might sleep. Do not call it in the atomic context
- * of URB completions.
- */
-static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv)
-{
-       usb_kill_anchored_urbs(&priv->tx_submitted);
-       kvaser_usb_reset_tx_urb_contexts(priv);
-}
-
-static void kvaser_usb_unlink_all_urbs(struct kvaser_usb *dev)
-{
-       int i;
-
-       usb_kill_anchored_urbs(&dev->rx_submitted);
-
-       for (i = 0; i < MAX_RX_URBS; i++)
-               usb_free_coherent(dev->udev, RX_BUFFER_SIZE,
-                                 dev->rxbuf[i],
-                                 dev->rxbuf_dma[i]);
-
-       for (i = 0; i < dev->nchannels; i++) {
-               struct kvaser_usb_net_priv *priv = dev->nets[i];
-
-               if (priv)
-                       kvaser_usb_unlink_tx_urbs(priv);
-       }
-}
-
-static int kvaser_usb_stop_chip(struct kvaser_usb_net_priv *priv)
-{
-       int err;
-
-       init_completion(&priv->stop_comp);
-
-       err = kvaser_usb_send_simple_msg(priv->dev, CMD_STOP_CHIP,
-                                        priv->channel);
-       if (err)
-               return err;
-
-       if (!wait_for_completion_timeout(&priv->stop_comp,
-                                        msecs_to_jiffies(STOP_TIMEOUT)))
-               return -ETIMEDOUT;
-
-       return 0;
-}
-
-static int kvaser_usb_flush_queue(struct kvaser_usb_net_priv *priv)
-{
-       struct kvaser_msg *msg;
-       int rc;
-
-       msg = kmalloc(sizeof(*msg), GFP_KERNEL);
-       if (!msg)
-               return -ENOMEM;
-
-       msg->id = CMD_FLUSH_QUEUE;
-       msg->len = MSG_HEADER_LEN + sizeof(struct kvaser_msg_flush_queue);
-       msg->u.flush_queue.channel = priv->channel;
-       msg->u.flush_queue.flags = 0x00;
-
-       rc = kvaser_usb_send_msg(priv->dev, msg);
-
-       kfree(msg);
-       return rc;
-}
-
-static int kvaser_usb_close(struct net_device *netdev)
-{
-       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
-       struct kvaser_usb *dev = priv->dev;
-       int err;
-
-       netif_stop_queue(netdev);
-
-       err = kvaser_usb_flush_queue(priv);
-       if (err)
-               netdev_warn(netdev, "Cannot flush queue, error %d\n", err);
-
-       err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, priv->channel);
-       if (err)
-               netdev_warn(netdev, "Cannot reset card, error %d\n", err);
-
-       err = kvaser_usb_stop_chip(priv);
-       if (err)
-               netdev_warn(netdev, "Cannot stop device, error %d\n", err);
-
-       /* reset tx contexts */
-       kvaser_usb_unlink_tx_urbs(priv);
-
-       priv->can.state = CAN_STATE_STOPPED;
-       close_candev(priv->netdev);
-
-       return 0;
-}
-
-static void kvaser_usb_write_bulk_callback(struct urb *urb)
-{
-       struct kvaser_usb_tx_urb_context *context = urb->context;
-       struct kvaser_usb_net_priv *priv;
-       struct net_device *netdev;
-
-       if (WARN_ON(!context))
-               return;
-
-       priv = context->priv;
-       netdev = priv->netdev;
-
-       kfree(urb->transfer_buffer);
-
-       if (!netif_device_present(netdev))
-               return;
-
-       if (urb->status)
-               netdev_info(netdev, "Tx URB aborted (%d)\n", urb->status);
-}
-
-static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
-                                        struct net_device *netdev)
-{
-       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
-       struct kvaser_usb *dev = priv->dev;
-       struct net_device_stats *stats = &netdev->stats;
-       struct can_frame *cf = (struct can_frame *)skb->data;
-       struct kvaser_usb_tx_urb_context *context = NULL;
-       struct urb *urb;
-       void *buf;
-       struct kvaser_msg *msg;
-       int i, err, ret = NETDEV_TX_OK;
-       u8 *msg_tx_can_flags = NULL;            /* GCC */
-       unsigned long flags;
-
-       if (can_dropped_invalid_skb(netdev, skb))
-               return NETDEV_TX_OK;
-
-       urb = usb_alloc_urb(0, GFP_ATOMIC);
-       if (!urb) {
-               stats->tx_dropped++;
-               dev_kfree_skb(skb);
-               return NETDEV_TX_OK;
-       }
-
-       buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC);
-       if (!buf) {
-               stats->tx_dropped++;
-               dev_kfree_skb(skb);
-               goto freeurb;
-       }
-
-       msg = buf;
-       msg->len = MSG_HEADER_LEN + sizeof(struct kvaser_msg_tx_can);
-       msg->u.tx_can.channel = priv->channel;
-
-       switch (dev->family) {
-       case KVASER_LEAF:
-               msg_tx_can_flags = &msg->u.tx_can.leaf.flags;
-               break;
-       case KVASER_USBCAN:
-               msg_tx_can_flags = &msg->u.tx_can.usbcan.flags;
-               break;
-       }
-
-       *msg_tx_can_flags = 0;
-
-       if (cf->can_id & CAN_EFF_FLAG) {
-               msg->id = CMD_TX_EXT_MESSAGE;
-               msg->u.tx_can.msg[0] = (cf->can_id >> 24) & 0x1f;
-               msg->u.tx_can.msg[1] = (cf->can_id >> 18) & 0x3f;
-               msg->u.tx_can.msg[2] = (cf->can_id >> 14) & 0x0f;
-               msg->u.tx_can.msg[3] = (cf->can_id >> 6) & 0xff;
-               msg->u.tx_can.msg[4] = cf->can_id & 0x3f;
-       } else {
-               msg->id = CMD_TX_STD_MESSAGE;
-               msg->u.tx_can.msg[0] = (cf->can_id >> 6) & 0x1f;
-               msg->u.tx_can.msg[1] = cf->can_id & 0x3f;
-       }
-
-       msg->u.tx_can.msg[5] = cf->can_dlc;
-       memcpy(&msg->u.tx_can.msg[6], cf->data, cf->can_dlc);
-
-       if (cf->can_id & CAN_RTR_FLAG)
-               *msg_tx_can_flags |= MSG_FLAG_REMOTE_FRAME;
-
-       spin_lock_irqsave(&priv->tx_contexts_lock, flags);
-       for (i = 0; i < dev->max_tx_urbs; i++) {
-               if (priv->tx_contexts[i].echo_index == dev->max_tx_urbs) {
-                       context = &priv->tx_contexts[i];
-
-                       context->echo_index = i;
-                       can_put_echo_skb(skb, netdev, context->echo_index);
-                       ++priv->active_tx_contexts;
-                       if (priv->active_tx_contexts >= dev->max_tx_urbs)
-                               netif_stop_queue(netdev);
-
-                       break;
-               }
-       }
-       spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
-
-       /* This should never happen; it implies a flow control bug */
-       if (!context) {
-               netdev_warn(netdev, "cannot find free context\n");
-
-               kfree(buf);
-               ret =  NETDEV_TX_BUSY;
-               goto freeurb;
-       }
-
-       context->priv = priv;
-       context->dlc = cf->can_dlc;
-
-       msg->u.tx_can.tid = context->echo_index;
-
-       usb_fill_bulk_urb(urb, dev->udev,
-                         usb_sndbulkpipe(dev->udev,
-                                         dev->bulk_out->bEndpointAddress),
-                         buf, msg->len,
-                         kvaser_usb_write_bulk_callback, context);
-       usb_anchor_urb(urb, &priv->tx_submitted);
-
-       err = usb_submit_urb(urb, GFP_ATOMIC);
-       if (unlikely(err)) {
-               spin_lock_irqsave(&priv->tx_contexts_lock, flags);
-
-               can_free_echo_skb(netdev, context->echo_index);
-               context->echo_index = dev->max_tx_urbs;
-               --priv->active_tx_contexts;
-               netif_wake_queue(netdev);
-
-               spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
-
-               usb_unanchor_urb(urb);
-               kfree(buf);
-
-               stats->tx_dropped++;
-
-               if (err == -ENODEV)
-                       netif_device_detach(netdev);
-               else
-                       netdev_warn(netdev, "Failed tx_urb %d\n", err);
-
-               goto freeurb;
-       }
-
-       ret = NETDEV_TX_OK;
-
-freeurb:
-       usb_free_urb(urb);
-       return ret;
-}
-
-static const struct net_device_ops kvaser_usb_netdev_ops = {
-       .ndo_open = kvaser_usb_open,
-       .ndo_stop = kvaser_usb_close,
-       .ndo_start_xmit = kvaser_usb_start_xmit,
-       .ndo_change_mtu = can_change_mtu,
-};
-
-static const struct can_bittiming_const kvaser_usb_bittiming_const = {
-       .name = "kvaser_usb",
-       .tseg1_min = KVASER_USB_TSEG1_MIN,
-       .tseg1_max = KVASER_USB_TSEG1_MAX,
-       .tseg2_min = KVASER_USB_TSEG2_MIN,
-       .tseg2_max = KVASER_USB_TSEG2_MAX,
-       .sjw_max = KVASER_USB_SJW_MAX,
-       .brp_min = KVASER_USB_BRP_MIN,
-       .brp_max = KVASER_USB_BRP_MAX,
-       .brp_inc = KVASER_USB_BRP_INC,
-};
-
-static int kvaser_usb_set_bittiming(struct net_device *netdev)
-{
-       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
-       struct can_bittiming *bt = &priv->can.bittiming;
-       struct kvaser_usb *dev = priv->dev;
-       struct kvaser_msg *msg;
-       int rc;
-
-       msg = kmalloc(sizeof(*msg), GFP_KERNEL);
-       if (!msg)
-               return -ENOMEM;
-
-       msg->id = CMD_SET_BUS_PARAMS;
-       msg->len = MSG_HEADER_LEN + sizeof(struct kvaser_msg_busparams);
-       msg->u.busparams.channel = priv->channel;
-       msg->u.busparams.tid = 0xff;
-       msg->u.busparams.bitrate = cpu_to_le32(bt->bitrate);
-       msg->u.busparams.sjw = bt->sjw;
-       msg->u.busparams.tseg1 = bt->prop_seg + bt->phase_seg1;
-       msg->u.busparams.tseg2 = bt->phase_seg2;
-
-       if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
-               msg->u.busparams.no_samp = 3;
-       else
-               msg->u.busparams.no_samp = 1;
-
-       rc = kvaser_usb_send_msg(dev, msg);
-
-       kfree(msg);
-       return rc;
-}
-
-static int kvaser_usb_set_mode(struct net_device *netdev,
-                              enum can_mode mode)
-{
-       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
-       int err;
-
-       switch (mode) {
-       case CAN_MODE_START:
-               err = kvaser_usb_simple_msg_async(priv, CMD_START_CHIP);
-               if (err)
-                       return err;
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-static int kvaser_usb_get_berr_counter(const struct net_device *netdev,
-                                      struct can_berr_counter *bec)
-{
-       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
-
-       *bec = priv->bec;
-
-       return 0;
-}
-
-static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev)
-{
-       int i;
-
-       for (i = 0; i < dev->nchannels; i++) {
-               if (!dev->nets[i])
-                       continue;
-
-               unregister_candev(dev->nets[i]->netdev);
-       }
-
-       kvaser_usb_unlink_all_urbs(dev);
-
-       for (i = 0; i < dev->nchannels; i++) {
-               if (!dev->nets[i])
-                       continue;
-
-               free_candev(dev->nets[i]->netdev);
-       }
-}
-
-static int kvaser_usb_init_one(struct usb_interface *intf,
-                              const struct usb_device_id *id, int channel)
-{
-       struct kvaser_usb *dev = usb_get_intfdata(intf);
-       struct net_device *netdev;
-       struct kvaser_usb_net_priv *priv;
-       int err;
-
-       err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, channel);
-       if (err)
-               return err;
-
-       netdev = alloc_candev(sizeof(*priv) +
-                             dev->max_tx_urbs * sizeof(*priv->tx_contexts),
-                             dev->max_tx_urbs);
-       if (!netdev) {
-               dev_err(&intf->dev, "Cannot alloc candev\n");
-               return -ENOMEM;
-       }
-
-       priv = netdev_priv(netdev);
-
-       init_usb_anchor(&priv->tx_submitted);
-       init_completion(&priv->start_comp);
-       init_completion(&priv->stop_comp);
-
-       priv->dev = dev;
-       priv->netdev = netdev;
-       priv->channel = channel;
-
-       spin_lock_init(&priv->tx_contexts_lock);
-       kvaser_usb_reset_tx_urb_contexts(priv);
-
-       priv->can.state = CAN_STATE_STOPPED;
-       priv->can.clock.freq = CAN_USB_CLOCK;
-       priv->can.bittiming_const = &kvaser_usb_bittiming_const;
-       priv->can.do_set_bittiming = kvaser_usb_set_bittiming;
-       priv->can.do_set_mode = kvaser_usb_set_mode;
-       if (id->driver_info & KVASER_HAS_TXRX_ERRORS)
-               priv->can.do_get_berr_counter = kvaser_usb_get_berr_counter;
-       priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
-       if (id->driver_info & KVASER_HAS_SILENT_MODE)
-               priv->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY;
-
-       netdev->flags |= IFF_ECHO;
-
-       netdev->netdev_ops = &kvaser_usb_netdev_ops;
-
-       SET_NETDEV_DEV(netdev, &intf->dev);
-       netdev->dev_id = channel;
-
-       dev->nets[channel] = priv;
-
-       err = register_candev(netdev);
-       if (err) {
-               dev_err(&intf->dev, "Failed to register can device\n");
-               free_candev(netdev);
-               dev->nets[channel] = NULL;
-               return err;
-       }
-
-       netdev_dbg(netdev, "device registered\n");
-
-       return 0;
-}
-
-static int kvaser_usb_get_endpoints(const struct usb_interface *intf,
-                                   struct usb_endpoint_descriptor **in,
-                                   struct usb_endpoint_descriptor **out)
-{
-       const struct usb_host_interface *iface_desc;
-       struct usb_endpoint_descriptor *endpoint;
-       int i;
-
-       iface_desc = &intf->altsetting[0];
-
-       for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
-               endpoint = &iface_desc->endpoint[i].desc;
-
-               if (!*in && usb_endpoint_is_bulk_in(endpoint))
-                       *in = endpoint;
-
-               if (!*out && usb_endpoint_is_bulk_out(endpoint))
-                       *out = endpoint;
-
-               /* use first bulk endpoint for in and out */
-               if (*in && *out)
-                       return 0;
-       }
-
-       return -ENODEV;
-}
-
-static int kvaser_usb_probe(struct usb_interface *intf,
-                           const struct usb_device_id *id)
-{
-       struct kvaser_usb *dev;
-       int err = -ENOMEM;
-       int i, retry = 3;
-
-       dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL);
-       if (!dev)
-               return -ENOMEM;
-
-       if (kvaser_is_leaf(id)) {
-               dev->family = KVASER_LEAF;
-       } else if (kvaser_is_usbcan(id)) {
-               dev->family = KVASER_USBCAN;
-       } else {
-               dev_err(&intf->dev,
-                       "Product ID (%d) does not belong to any known Kvaser USB family",
-                       id->idProduct);
-               return -ENODEV;
-       }
-
-       err = kvaser_usb_get_endpoints(intf, &dev->bulk_in, &dev->bulk_out);
-       if (err) {
-               dev_err(&intf->dev, "Cannot get usb endpoint(s)");
-               return err;
-       }
-
-       dev->udev = interface_to_usbdev(intf);
-
-       init_usb_anchor(&dev->rx_submitted);
-
-       usb_set_intfdata(intf, dev);
-
-       /* On some x86 laptops, plugging a Kvaser device again after
-        * an unplug makes the firmware always ignore the very first
-        * command. For such a case, provide some room for retries
-        * instead of completely exiting the driver.
-        */
-       do {
-               err = kvaser_usb_get_software_info(dev);
-       } while (--retry && err == -ETIMEDOUT);
-
-       if (err) {
-               dev_err(&intf->dev,
-                       "Cannot get software infos, error %d\n", err);
-               return err;
-       }
-
-       dev_dbg(&intf->dev, "Firmware version: %d.%d.%d\n",
-               ((dev->fw_version >> 24) & 0xff),
-               ((dev->fw_version >> 16) & 0xff),
-               (dev->fw_version & 0xffff));
-
-       dev_dbg(&intf->dev, "Max outstanding tx = %d URBs\n", dev->max_tx_urbs);
-
-       err = kvaser_usb_get_card_info(dev);
-       if (err) {
-               dev_err(&intf->dev,
-                       "Cannot get card infos, error %d\n", err);
-               return err;
-       }
-
-       for (i = 0; i < dev->nchannels; i++) {
-               err = kvaser_usb_init_one(intf, id, i);
-               if (err) {
-                       kvaser_usb_remove_interfaces(dev);
-                       return err;
-               }
-       }
-
-       return 0;
-}
-
-static void kvaser_usb_disconnect(struct usb_interface *intf)
-{
-       struct kvaser_usb *dev = usb_get_intfdata(intf);
-
-       usb_set_intfdata(intf, NULL);
-
-       if (!dev)
-               return;
-
-       kvaser_usb_remove_interfaces(dev);
-}
-
-static struct usb_driver kvaser_usb_driver = {
-       .name = "kvaser_usb",
-       .probe = kvaser_usb_probe,
-       .disconnect = kvaser_usb_disconnect,
-       .id_table = kvaser_usb_table,
-};
-
-module_usb_driver(kvaser_usb_driver);
-
-MODULE_AUTHOR("Olivier Sobrie <olivier@sobrie.be>");
-MODULE_DESCRIPTION("CAN driver for Kvaser CAN/USB devices");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/can/usb/kvaser_usb/Makefile b/drivers/net/can/usb/kvaser_usb/Makefile
new file mode 100644 (file)
index 0000000..9f41dda
--- /dev/null
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CAN_KVASER_USB) += kvaser_usb.o
+kvaser_usb-y = kvaser_usb_core.o kvaser_usb_leaf.o kvaser_usb_hydra.o
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h
new file mode 100644 (file)
index 0000000..390b6bd
--- /dev/null
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Parts of this driver are based on the following:
+ *  - Kvaser linux leaf driver (version 4.78)
+ *  - CAN driver for esd CAN-USB/2
+ *  - Kvaser linux usbcanII driver (version 5.3)
+ *  - Kvaser linux mhydra driver (version 5.24)
+ *
+ * Copyright (C) 2002-2018 KVASER AB, Sweden. All rights reserved.
+ * Copyright (C) 2010 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh
+ * Copyright (C) 2012 Olivier Sobrie <olivier@sobrie.be>
+ * Copyright (C) 2015 Valeo S.A.
+ */
+
+#ifndef KVASER_USB_H
+#define KVASER_USB_H
+
+/* Kvaser USB CAN dongles are divided into three major platforms:
+ * - Hydra: Running firmware labeled as 'mhydra'
+ * - Leaf: Based on Renesas M32C or Freescale i.MX28, running firmware labeled
+ *         as 'filo'
+ * - UsbcanII: Based on Renesas M16C, running firmware labeled as 'helios'
+ */
+
+#include <linux/completion.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/usb.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+
+#define KVASER_USB_MAX_RX_URBS                 4
+#define KVASER_USB_MAX_TX_URBS                 128
+#define KVASER_USB_TIMEOUT                     1000 /* msecs */
+#define KVASER_USB_RX_BUFFER_SIZE              3072
+#define KVASER_USB_MAX_NET_DEVICES             5
+
+/* USB devices features */
+#define KVASER_USB_HAS_SILENT_MODE             BIT(0)
+#define KVASER_USB_HAS_TXRX_ERRORS             BIT(1)
+
+/* Device capabilities */
+#define KVASER_USB_CAP_BERR_CAP                        0x01
+#define KVASER_USB_CAP_EXT_CAP                 0x02
+#define KVASER_USB_HYDRA_CAP_EXT_CMD           0x04
+
+struct kvaser_usb_dev_cfg;
+
+enum kvaser_usb_leaf_family {
+       KVASER_LEAF,
+       KVASER_USBCAN,
+};
+
+#define KVASER_USB_HYDRA_MAX_CMD_LEN           128
+struct kvaser_usb_dev_card_data_hydra {
+       u8 channel_to_he[KVASER_USB_MAX_NET_DEVICES];
+       u8 sysdbg_he;
+       spinlock_t transid_lock; /* lock for transid */
+       u16 transid;
+       /* lock for usb_rx_leftover and usb_rx_leftover_len */
+       spinlock_t usb_rx_leftover_lock;
+       u8 usb_rx_leftover[KVASER_USB_HYDRA_MAX_CMD_LEN];
+       u8 usb_rx_leftover_len;
+};
+struct kvaser_usb_dev_card_data {
+       u32 ctrlmode_supported;
+       u32 capabilities;
+       union {
+               struct {
+                       enum kvaser_usb_leaf_family family;
+               } leaf;
+               struct kvaser_usb_dev_card_data_hydra hydra;
+       };
+};
+
+/* Context for an outstanding, not yet ACKed, transmission */
+struct kvaser_usb_tx_urb_context {
+       struct kvaser_usb_net_priv *priv;
+       u32 echo_index;
+       int dlc;
+};
+
+struct kvaser_usb {
+       struct usb_device *udev;
+       struct usb_interface *intf;
+       struct kvaser_usb_net_priv *nets[KVASER_USB_MAX_NET_DEVICES];
+       const struct kvaser_usb_dev_ops *ops;
+       const struct kvaser_usb_dev_cfg *cfg;
+
+       struct usb_endpoint_descriptor *bulk_in, *bulk_out;
+       struct usb_anchor rx_submitted;
+
+       /* @max_tx_urbs: Firmware-reported maximum number of outstanding,
+        * not yet ACKed, transmissions on this device. This value is
+        * also used as a sentinel for marking free tx contexts.
+        */
+       u32 fw_version;
+       unsigned int nchannels;
+       unsigned int max_tx_urbs;
+       struct kvaser_usb_dev_card_data card_data;
+
+       bool rxinitdone;
+       void *rxbuf[KVASER_USB_MAX_RX_URBS];
+       dma_addr_t rxbuf_dma[KVASER_USB_MAX_RX_URBS];
+};
+
+struct kvaser_usb_net_priv {
+       struct can_priv can;
+       struct can_berr_counter bec;
+
+       struct kvaser_usb *dev;
+       struct net_device *netdev;
+       int channel;
+
+       struct completion start_comp, stop_comp, flush_comp;
+       struct usb_anchor tx_submitted;
+
+       spinlock_t tx_contexts_lock; /* lock for active_tx_contexts */
+       int active_tx_contexts;
+       struct kvaser_usb_tx_urb_context tx_contexts[];
+};
+
+/**
+ * struct kvaser_usb_dev_ops - Device specific functions
+ * @dev_set_mode:              used for can.do_set_mode
+ * @dev_set_bittiming:         used for can.do_set_bittiming
+ * @dev_set_data_bittiming:    used for can.do_set_data_bittiming
+ * @dev_get_berr_counter:      used for can.do_get_berr_counter
+ *
+ * @dev_setup_endpoints:       setup USB in and out endpoints
+ * @dev_init_card:             initialize card
+ * @dev_get_software_info:     get software info
+ * @dev_get_software_details:  get software details
+ * @dev_get_card_info:         get card info
+ * @dev_get_capabilities:      discover device capabilities
+ *
+ * @dev_set_opt_mode:          set ctrlmod
+ * @dev_start_chip:            start the CAN controller
+ * @dev_stop_chip:             stop the CAN controller
+ * @dev_reset_chip:            reset the CAN controller
+ * @dev_flush_queue:           flush outstanding CAN messages
+ * @dev_read_bulk_callback:    handle incoming commands
+ * @dev_frame_to_cmd:          translate struct can_frame into device command
+ */
+struct kvaser_usb_dev_ops {
+       int (*dev_set_mode)(struct net_device *netdev, enum can_mode mode);
+       int (*dev_set_bittiming)(struct net_device *netdev);
+       int (*dev_set_data_bittiming)(struct net_device *netdev);
+       int (*dev_get_berr_counter)(const struct net_device *netdev,
+                                   struct can_berr_counter *bec);
+       int (*dev_setup_endpoints)(struct kvaser_usb *dev);
+       int (*dev_init_card)(struct kvaser_usb *dev);
+       int (*dev_get_software_info)(struct kvaser_usb *dev);
+       int (*dev_get_software_details)(struct kvaser_usb *dev);
+       int (*dev_get_card_info)(struct kvaser_usb *dev);
+       int (*dev_get_capabilities)(struct kvaser_usb *dev);
+       int (*dev_set_opt_mode)(const struct kvaser_usb_net_priv *priv);
+       int (*dev_start_chip)(struct kvaser_usb_net_priv *priv);
+       int (*dev_stop_chip)(struct kvaser_usb_net_priv *priv);
+       int (*dev_reset_chip)(struct kvaser_usb *dev, int channel);
+       int (*dev_flush_queue)(struct kvaser_usb_net_priv *priv);
+       void (*dev_read_bulk_callback)(struct kvaser_usb *dev, void *buf,
+                                      int len);
+       void *(*dev_frame_to_cmd)(const struct kvaser_usb_net_priv *priv,
+                                 const struct sk_buff *skb, int *frame_len,
+                                 int *cmd_len, u16 transid);
+};
+
+struct kvaser_usb_dev_cfg {
+       const struct can_clock clock;
+       const unsigned int timestamp_freq;
+       const struct can_bittiming_const * const bittiming_const;
+       const struct can_bittiming_const * const data_bittiming_const;
+};
+
+extern const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops;
+extern const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops;
+
+int kvaser_usb_recv_cmd(const struct kvaser_usb *dev, void *cmd, int len,
+                       int *actual_len);
+
+int kvaser_usb_send_cmd(const struct kvaser_usb *dev, void *cmd, int len);
+
+int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd,
+                             int len);
+
+int kvaser_usb_can_rx_over_error(struct net_device *netdev);
+#endif /* KVASER_USB_H */
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
new file mode 100644 (file)
index 0000000..b939a4c
--- /dev/null
@@ -0,0 +1,835 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Parts of this driver are based on the following:
+ *  - Kvaser linux leaf driver (version 4.78)
+ *  - CAN driver for esd CAN-USB/2
+ *  - Kvaser linux usbcanII driver (version 5.3)
+ *  - Kvaser linux mhydra driver (version 5.24)
+ *
+ * Copyright (C) 2002-2018 KVASER AB, Sweden. All rights reserved.
+ * Copyright (C) 2010 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh
+ * Copyright (C) 2012 Olivier Sobrie <olivier@sobrie.be>
+ * Copyright (C) 2015 Valeo S.A.
+ */
+
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/gfp.h>
+#include <linux/if.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/usb.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/can/netlink.h>
+
+#include "kvaser_usb.h"
+
+/* Kvaser USB vendor id. */
+#define KVASER_VENDOR_ID                       0x0bfd
+
+/* Kvaser Leaf USB devices product ids */
+#define USB_LEAF_DEVEL_PRODUCT_ID              10
+#define USB_LEAF_LITE_PRODUCT_ID               11
+#define USB_LEAF_PRO_PRODUCT_ID                        12
+#define USB_LEAF_SPRO_PRODUCT_ID               14
+#define USB_LEAF_PRO_LS_PRODUCT_ID             15
+#define USB_LEAF_PRO_SWC_PRODUCT_ID            16
+#define USB_LEAF_PRO_LIN_PRODUCT_ID            17
+#define USB_LEAF_SPRO_LS_PRODUCT_ID            18
+#define USB_LEAF_SPRO_SWC_PRODUCT_ID           19
+#define USB_MEMO2_DEVEL_PRODUCT_ID             22
+#define USB_MEMO2_HSHS_PRODUCT_ID              23
+#define USB_UPRO_HSHS_PRODUCT_ID               24
+#define USB_LEAF_LITE_GI_PRODUCT_ID            25
+#define USB_LEAF_PRO_OBDII_PRODUCT_ID          26
+#define USB_MEMO2_HSLS_PRODUCT_ID              27
+#define USB_LEAF_LITE_CH_PRODUCT_ID            28
+#define USB_BLACKBIRD_SPRO_PRODUCT_ID          29
+#define USB_OEM_MERCURY_PRODUCT_ID             34
+#define USB_OEM_LEAF_PRODUCT_ID                        35
+#define USB_CAN_R_PRODUCT_ID                   39
+#define USB_LEAF_LITE_V2_PRODUCT_ID            288
+#define USB_MINI_PCIE_HS_PRODUCT_ID            289
+#define USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID    290
+#define USB_USBCAN_LIGHT_2HS_PRODUCT_ID                291
+#define USB_MINI_PCIE_2HS_PRODUCT_ID           292
+
+/* Kvaser USBCan-II devices product ids */
+#define USB_USBCAN_REVB_PRODUCT_ID             2
+#define USB_VCI2_PRODUCT_ID                    3
+#define USB_USBCAN2_PRODUCT_ID                 4
+#define USB_MEMORATOR_PRODUCT_ID               5
+
+/* Kvaser Minihydra USB devices product ids */
+#define USB_BLACKBIRD_V2_PRODUCT_ID            258
+#define USB_MEMO_PRO_5HS_PRODUCT_ID            260
+#define USB_USBCAN_PRO_5HS_PRODUCT_ID          261
+#define USB_USBCAN_LIGHT_4HS_PRODUCT_ID                262
+#define USB_LEAF_PRO_HS_V2_PRODUCT_ID          263
+#define USB_USBCAN_PRO_2HS_V2_PRODUCT_ID       264
+#define USB_MEMO_2HS_PRODUCT_ID                        265
+#define USB_MEMO_PRO_2HS_V2_PRODUCT_ID         266
+#define USB_HYBRID_CANLIN_PRODUCT_ID           267
+#define USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID   268
+#define USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID     269
+#define USB_HYBRID_PRO_CANLIN_PRODUCT_ID       270
+
+static inline bool kvaser_is_leaf(const struct usb_device_id *id)
+{
+       return (id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID &&
+               id->idProduct <= USB_CAN_R_PRODUCT_ID) ||
+               (id->idProduct >= USB_LEAF_LITE_V2_PRODUCT_ID &&
+                id->idProduct <= USB_MINI_PCIE_2HS_PRODUCT_ID);
+}
+
+static inline bool kvaser_is_usbcan(const struct usb_device_id *id)
+{
+       return id->idProduct >= USB_USBCAN_REVB_PRODUCT_ID &&
+              id->idProduct <= USB_MEMORATOR_PRODUCT_ID;
+}
+
+static inline bool kvaser_is_hydra(const struct usb_device_id *id)
+{
+       return id->idProduct >= USB_BLACKBIRD_V2_PRODUCT_ID &&
+              id->idProduct <= USB_HYBRID_PRO_CANLIN_PRODUCT_ID;
+}
+
+static const struct usb_device_id kvaser_usb_table[] = {
+       /* Leaf USB product IDs */
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+                              KVASER_USB_HAS_SILENT_MODE },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+                              KVASER_USB_HAS_SILENT_MODE },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LS_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+                              KVASER_USB_HAS_SILENT_MODE },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_SWC_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+                              KVASER_USB_HAS_SILENT_MODE },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LIN_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+                              KVASER_USB_HAS_SILENT_MODE },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_LS_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+                              KVASER_USB_HAS_SILENT_MODE },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_SWC_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+                              KVASER_USB_HAS_SILENT_MODE },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_DEVEL_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+                              KVASER_USB_HAS_SILENT_MODE },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSHS_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+                              KVASER_USB_HAS_SILENT_MODE },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_UPRO_HSHS_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_OBDII_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
+                              KVASER_USB_HAS_SILENT_MODE },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSLS_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_CH_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_SPRO_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_MERCURY_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_LEAF_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_CAN_R_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) },
+
+       /* USBCANII USB product IDs */
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_REVB_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMORATOR_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_VCI2_PRODUCT_ID),
+               .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+
+       /* Minihydra USB product IDs */
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID) },
+       { }
+};
+MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
+
+int kvaser_usb_send_cmd(const struct kvaser_usb *dev, void *cmd, int len)
+{
+       int actual_len; /* Not used */
+
+       return usb_bulk_msg(dev->udev,
+                           usb_sndbulkpipe(dev->udev,
+                                           dev->bulk_out->bEndpointAddress),
+                           cmd, len, &actual_len, KVASER_USB_TIMEOUT);
+}
+
+int kvaser_usb_recv_cmd(const struct kvaser_usb *dev, void *cmd, int len,
+                       int *actual_len)
+{
+       return usb_bulk_msg(dev->udev,
+                           usb_rcvbulkpipe(dev->udev,
+                                           dev->bulk_in->bEndpointAddress),
+                           cmd, len, actual_len, KVASER_USB_TIMEOUT);
+}
+
+static void kvaser_usb_send_cmd_callback(struct urb *urb)
+{
+       struct net_device *netdev = urb->context;
+
+       kfree(urb->transfer_buffer);
+
+       if (urb->status)
+               netdev_warn(netdev, "urb status received: %d\n", urb->status);
+}
+
+int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd,
+                             int len)
+{
+       struct kvaser_usb *dev = priv->dev;
+       struct net_device *netdev = priv->netdev;
+       struct urb *urb;
+       int err;
+
+       urb = usb_alloc_urb(0, GFP_ATOMIC);
+       if (!urb)
+               return -ENOMEM;
+
+       usb_fill_bulk_urb(urb, dev->udev,
+                         usb_sndbulkpipe(dev->udev,
+                                         dev->bulk_out->bEndpointAddress),
+                         cmd, len, kvaser_usb_send_cmd_callback, netdev);
+       usb_anchor_urb(urb, &priv->tx_submitted);
+
+       err = usb_submit_urb(urb, GFP_ATOMIC);
+       if (err) {
+               netdev_err(netdev, "Error transmitting URB\n");
+               usb_unanchor_urb(urb);
+       }
+       usb_free_urb(urb);
+
+       return 0;
+}
+
+int kvaser_usb_can_rx_over_error(struct net_device *netdev)
+{
+       struct net_device_stats *stats = &netdev->stats;
+       struct can_frame *cf;
+       struct sk_buff *skb;
+
+       stats->rx_over_errors++;
+       stats->rx_errors++;
+
+       skb = alloc_can_err_skb(netdev, &cf);
+       if (!skb) {
+               stats->rx_dropped++;
+               netdev_warn(netdev, "No memory left for err_skb\n");
+               return -ENOMEM;
+       }
+
+       cf->can_id |= CAN_ERR_CRTL;
+       cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+
+       stats->rx_packets++;
+       stats->rx_bytes += cf->can_dlc;
+       netif_rx(skb);
+
+       return 0;
+}
+
+static void kvaser_usb_read_bulk_callback(struct urb *urb)
+{
+       struct kvaser_usb *dev = urb->context;
+       int err;
+       unsigned int i;
+
+       switch (urb->status) {
+       case 0:
+               break;
+       case -ENOENT:
+       case -EPIPE:
+       case -EPROTO:
+       case -ESHUTDOWN:
+               return;
+       default:
+               dev_info(&dev->intf->dev, "Rx URB aborted (%d)\n", urb->status);
+               goto resubmit_urb;
+       }
+
+       dev->ops->dev_read_bulk_callback(dev, urb->transfer_buffer,
+                                        urb->actual_length);
+
+resubmit_urb:
+       usb_fill_bulk_urb(urb, dev->udev,
+                         usb_rcvbulkpipe(dev->udev,
+                                         dev->bulk_in->bEndpointAddress),
+                         urb->transfer_buffer, KVASER_USB_RX_BUFFER_SIZE,
+                         kvaser_usb_read_bulk_callback, dev);
+
+       err = usb_submit_urb(urb, GFP_ATOMIC);
+       if (err == -ENODEV) {
+               for (i = 0; i < dev->nchannels; i++) {
+                       if (!dev->nets[i])
+                               continue;
+
+                       netif_device_detach(dev->nets[i]->netdev);
+               }
+       } else if (err) {
+               dev_err(&dev->intf->dev,
+                       "Failed resubmitting read bulk urb: %d\n", err);
+       }
+}
+
+static int kvaser_usb_setup_rx_urbs(struct kvaser_usb *dev)
+{
+       int i, err = 0;
+
+       if (dev->rxinitdone)
+               return 0;
+
+       for (i = 0; i < KVASER_USB_MAX_RX_URBS; i++) {
+               struct urb *urb = NULL;
+               u8 *buf = NULL;
+               dma_addr_t buf_dma;
+
+               urb = usb_alloc_urb(0, GFP_KERNEL);
+               if (!urb) {
+                       err = -ENOMEM;
+                       break;
+               }
+
+               buf = usb_alloc_coherent(dev->udev, KVASER_USB_RX_BUFFER_SIZE,
+                                        GFP_KERNEL, &buf_dma);
+               if (!buf) {
+                       dev_warn(&dev->intf->dev,
+                                "No memory left for USB buffer\n");
+                       usb_free_urb(urb);
+                       err = -ENOMEM;
+                       break;
+               }
+
+               usb_fill_bulk_urb(urb, dev->udev,
+                                 usb_rcvbulkpipe
+                                       (dev->udev,
+                                        dev->bulk_in->bEndpointAddress),
+                                 buf, KVASER_USB_RX_BUFFER_SIZE,
+                                 kvaser_usb_read_bulk_callback, dev);
+               urb->transfer_dma = buf_dma;
+               urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+               usb_anchor_urb(urb, &dev->rx_submitted);
+
+               err = usb_submit_urb(urb, GFP_KERNEL);
+               if (err) {
+                       usb_unanchor_urb(urb);
+                       usb_free_coherent(dev->udev,
+                                         KVASER_USB_RX_BUFFER_SIZE, buf,
+                                         buf_dma);
+                       usb_free_urb(urb);
+                       break;
+               }
+
+               dev->rxbuf[i] = buf;
+               dev->rxbuf_dma[i] = buf_dma;
+
+               usb_free_urb(urb);
+       }
+
+       if (i == 0) {
+               dev_warn(&dev->intf->dev, "Cannot setup read URBs, error %d\n",
+                        err);
+               return err;
+       } else if (i < KVASER_USB_MAX_RX_URBS) {
+               dev_warn(&dev->intf->dev, "RX performances may be slow\n");
+       }
+
+       dev->rxinitdone = true;
+
+       return 0;
+}
+
+static int kvaser_usb_open(struct net_device *netdev)
+{
+       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+       struct kvaser_usb *dev = priv->dev;
+       int err;
+
+       err = open_candev(netdev);
+       if (err)
+               return err;
+
+       err = kvaser_usb_setup_rx_urbs(dev);
+       if (err)
+               goto error;
+
+       err = dev->ops->dev_set_opt_mode(priv);
+       if (err)
+               goto error;
+
+       err = dev->ops->dev_start_chip(priv);
+       if (err) {
+               netdev_warn(netdev, "Cannot start device, error %d\n", err);
+               goto error;
+       }
+
+       priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+       return 0;
+
+error:
+       close_candev(netdev);
+       return err;
+}
+
+static void kvaser_usb_reset_tx_urb_contexts(struct kvaser_usb_net_priv *priv)
+{
+       int i, max_tx_urbs;
+
+       max_tx_urbs = priv->dev->max_tx_urbs;
+
+       priv->active_tx_contexts = 0;
+       for (i = 0; i < max_tx_urbs; i++)
+               priv->tx_contexts[i].echo_index = max_tx_urbs;
+}
+
+/* This method might sleep. Do not call it in the atomic context
+ * of URB completions.
+ */
+static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv)
+{
+       usb_kill_anchored_urbs(&priv->tx_submitted);
+       kvaser_usb_reset_tx_urb_contexts(priv);
+}
+
+static void kvaser_usb_unlink_all_urbs(struct kvaser_usb *dev)
+{
+       int i;
+
+       usb_kill_anchored_urbs(&dev->rx_submitted);
+
+       for (i = 0; i < KVASER_USB_MAX_RX_URBS; i++)
+               usb_free_coherent(dev->udev, KVASER_USB_RX_BUFFER_SIZE,
+                                 dev->rxbuf[i], dev->rxbuf_dma[i]);
+
+       for (i = 0; i < dev->nchannels; i++) {
+               struct kvaser_usb_net_priv *priv = dev->nets[i];
+
+               if (priv)
+                       kvaser_usb_unlink_tx_urbs(priv);
+       }
+}
+
+static int kvaser_usb_close(struct net_device *netdev)
+{
+       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+       struct kvaser_usb *dev = priv->dev;
+       int err;
+
+       netif_stop_queue(netdev);
+
+       err = dev->ops->dev_flush_queue(priv);
+       if (err)
+               netdev_warn(netdev, "Cannot flush queue, error %d\n", err);
+
+       if (dev->ops->dev_reset_chip) {
+               err = dev->ops->dev_reset_chip(dev, priv->channel);
+               if (err)
+                       netdev_warn(netdev, "Cannot reset card, error %d\n",
+                                   err);
+       }
+
+       err = dev->ops->dev_stop_chip(priv);
+       if (err)
+               netdev_warn(netdev, "Cannot stop device, error %d\n", err);
+
+       /* reset tx contexts */
+       kvaser_usb_unlink_tx_urbs(priv);
+
+       priv->can.state = CAN_STATE_STOPPED;
+       close_candev(priv->netdev);
+
+       return 0;
+}
+
+static void kvaser_usb_write_bulk_callback(struct urb *urb)
+{
+       struct kvaser_usb_tx_urb_context *context = urb->context;
+       struct kvaser_usb_net_priv *priv;
+       struct net_device *netdev;
+
+       if (WARN_ON(!context))
+               return;
+
+       priv = context->priv;
+       netdev = priv->netdev;
+
+       kfree(urb->transfer_buffer);
+
+       if (!netif_device_present(netdev))
+               return;
+
+       if (urb->status)
+               netdev_info(netdev, "Tx URB aborted (%d)\n", urb->status);
+}
+
+static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
+                                        struct net_device *netdev)
+{
+       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+       struct kvaser_usb *dev = priv->dev;
+       struct net_device_stats *stats = &netdev->stats;
+       struct kvaser_usb_tx_urb_context *context = NULL;
+       struct urb *urb;
+       void *buf;
+       int cmd_len = 0;
+       int err, ret = NETDEV_TX_OK;
+       unsigned int i;
+       unsigned long flags;
+
+       if (can_dropped_invalid_skb(netdev, skb))
+               return NETDEV_TX_OK;
+
+       urb = usb_alloc_urb(0, GFP_ATOMIC);
+       if (!urb) {
+               stats->tx_dropped++;
+               dev_kfree_skb(skb);
+               return NETDEV_TX_OK;
+       }
+
+       spin_lock_irqsave(&priv->tx_contexts_lock, flags);
+       for (i = 0; i < dev->max_tx_urbs; i++) {
+               if (priv->tx_contexts[i].echo_index == dev->max_tx_urbs) {
+                       context = &priv->tx_contexts[i];
+
+                       context->echo_index = i;
+                       can_put_echo_skb(skb, netdev, context->echo_index);
+                       ++priv->active_tx_contexts;
+                       if (priv->active_tx_contexts >= (int)dev->max_tx_urbs)
+                               netif_stop_queue(netdev);
+
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
+
+       /* This should never happen; it implies a flow control bug */
+       if (!context) {
+               netdev_warn(netdev, "cannot find free context\n");
+
+               ret = NETDEV_TX_BUSY;
+               goto freeurb;
+       }
+
+       buf = dev->ops->dev_frame_to_cmd(priv, skb, &context->dlc, &cmd_len,
+                                        context->echo_index);
+       if (!buf) {
+               stats->tx_dropped++;
+               dev_kfree_skb(skb);
+               spin_lock_irqsave(&priv->tx_contexts_lock, flags);
+
+               can_free_echo_skb(netdev, context->echo_index);
+               context->echo_index = dev->max_tx_urbs;
+               --priv->active_tx_contexts;
+               netif_wake_queue(netdev);
+
+               spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
+               goto freeurb;
+       }
+
+       context->priv = priv;
+
+       usb_fill_bulk_urb(urb, dev->udev,
+                         usb_sndbulkpipe(dev->udev,
+                                         dev->bulk_out->bEndpointAddress),
+                         buf, cmd_len, kvaser_usb_write_bulk_callback,
+                         context);
+       usb_anchor_urb(urb, &priv->tx_submitted);
+
+       err = usb_submit_urb(urb, GFP_ATOMIC);
+       if (unlikely(err)) {
+               spin_lock_irqsave(&priv->tx_contexts_lock, flags);
+
+               can_free_echo_skb(netdev, context->echo_index);
+               context->echo_index = dev->max_tx_urbs;
+               --priv->active_tx_contexts;
+               netif_wake_queue(netdev);
+
+               spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
+
+               usb_unanchor_urb(urb);
+               kfree(buf);
+
+               stats->tx_dropped++;
+
+               if (err == -ENODEV)
+                       netif_device_detach(netdev);
+               else
+                       netdev_warn(netdev, "Failed tx_urb %d\n", err);
+
+               goto freeurb;
+       }
+
+       ret = NETDEV_TX_OK;
+
+freeurb:
+       usb_free_urb(urb);
+       return ret;
+}
+
+static const struct net_device_ops kvaser_usb_netdev_ops = {
+       .ndo_open = kvaser_usb_open,
+       .ndo_stop = kvaser_usb_close,
+       .ndo_start_xmit = kvaser_usb_start_xmit,
+       .ndo_change_mtu = can_change_mtu,
+};
+
+static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev)
+{
+       int i;
+
+       for (i = 0; i < dev->nchannels; i++) {
+               if (!dev->nets[i])
+                       continue;
+
+               unregister_candev(dev->nets[i]->netdev);
+       }
+
+       kvaser_usb_unlink_all_urbs(dev);
+
+       for (i = 0; i < dev->nchannels; i++) {
+               if (!dev->nets[i])
+                       continue;
+
+               free_candev(dev->nets[i]->netdev);
+       }
+}
+
+static int kvaser_usb_init_one(struct kvaser_usb *dev,
+                              const struct usb_device_id *id, int channel)
+{
+       struct net_device *netdev;
+       struct kvaser_usb_net_priv *priv;
+       int err;
+
+       if (dev->ops->dev_reset_chip) {
+               err = dev->ops->dev_reset_chip(dev, channel);
+               if (err)
+                       return err;
+       }
+
+       netdev = alloc_candev(sizeof(*priv) +
+                             dev->max_tx_urbs * sizeof(*priv->tx_contexts),
+                             dev->max_tx_urbs);
+       if (!netdev) {
+               dev_err(&dev->intf->dev, "Cannot alloc candev\n");
+               return -ENOMEM;
+       }
+
+       priv = netdev_priv(netdev);
+
+       init_usb_anchor(&priv->tx_submitted);
+       init_completion(&priv->start_comp);
+       init_completion(&priv->stop_comp);
+       priv->can.ctrlmode_supported = 0;
+
+       priv->dev = dev;
+       priv->netdev = netdev;
+       priv->channel = channel;
+
+       spin_lock_init(&priv->tx_contexts_lock);
+       kvaser_usb_reset_tx_urb_contexts(priv);
+
+       priv->can.state = CAN_STATE_STOPPED;
+       priv->can.clock.freq = dev->cfg->clock.freq;
+       priv->can.bittiming_const = dev->cfg->bittiming_const;
+       priv->can.do_set_bittiming = dev->ops->dev_set_bittiming;
+       priv->can.do_set_mode = dev->ops->dev_set_mode;
+       if ((id->driver_info & KVASER_USB_HAS_TXRX_ERRORS) ||
+           (priv->dev->card_data.capabilities & KVASER_USB_CAP_BERR_CAP))
+               priv->can.do_get_berr_counter = dev->ops->dev_get_berr_counter;
+       if (id->driver_info & KVASER_USB_HAS_SILENT_MODE)
+               priv->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY;
+
+       priv->can.ctrlmode_supported |= dev->card_data.ctrlmode_supported;
+
+       if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) {
+               priv->can.data_bittiming_const = dev->cfg->data_bittiming_const;
+               priv->can.do_set_data_bittiming =
+                                       dev->ops->dev_set_data_bittiming;
+       }
+
+       netdev->flags |= IFF_ECHO;
+
+       netdev->netdev_ops = &kvaser_usb_netdev_ops;
+
+       SET_NETDEV_DEV(netdev, &dev->intf->dev);
+       netdev->dev_id = channel;
+
+       dev->nets[channel] = priv;
+
+       err = register_candev(netdev);
+       if (err) {
+               dev_err(&dev->intf->dev, "Failed to register CAN device\n");
+               free_candev(netdev);
+               dev->nets[channel] = NULL;
+               return err;
+       }
+
+       netdev_dbg(netdev, "device registered\n");
+
+       return 0;
+}
+
+static int kvaser_usb_probe(struct usb_interface *intf,
+                           const struct usb_device_id *id)
+{
+       struct kvaser_usb *dev;
+       int err;
+       int i;
+
+       dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL);
+       if (!dev)
+               return -ENOMEM;
+
+       if (kvaser_is_leaf(id)) {
+               dev->card_data.leaf.family = KVASER_LEAF;
+               dev->ops = &kvaser_usb_leaf_dev_ops;
+       } else if (kvaser_is_usbcan(id)) {
+               dev->card_data.leaf.family = KVASER_USBCAN;
+               dev->ops = &kvaser_usb_leaf_dev_ops;
+       } else if (kvaser_is_hydra(id)) {
+               dev->ops = &kvaser_usb_hydra_dev_ops;
+       } else {
+               dev_err(&intf->dev,
+                       "Product ID (%d) is not a supported Kvaser USB device\n",
+                       id->idProduct);
+               return -ENODEV;
+       }
+
+       dev->intf = intf;
+
+       err = dev->ops->dev_setup_endpoints(dev);
+       if (err) {
+               dev_err(&intf->dev, "Cannot get usb endpoint(s)");
+               return err;
+       }
+
+       dev->udev = interface_to_usbdev(intf);
+
+       init_usb_anchor(&dev->rx_submitted);
+
+       usb_set_intfdata(intf, dev);
+
+       dev->card_data.ctrlmode_supported = 0;
+       dev->card_data.capabilities = 0;
+       err = dev->ops->dev_init_card(dev);
+       if (err) {
+               dev_err(&intf->dev,
+                       "Failed to initialize card, error %d\n", err);
+               return err;
+       }
+
+       err = dev->ops->dev_get_software_info(dev);
+       if (err) {
+               dev_err(&intf->dev,
+                       "Cannot get software info, error %d\n", err);
+               return err;
+       }
+
+       if (dev->ops->dev_get_software_details) {
+               err = dev->ops->dev_get_software_details(dev);
+               if (err) {
+                       dev_err(&intf->dev,
+                               "Cannot get software details, error %d\n", err);
+                       return err;
+               }
+       }
+
+       if (WARN_ON(!dev->cfg))
+               return -ENODEV;
+
+       dev_dbg(&intf->dev, "Firmware version: %d.%d.%d\n",
+               ((dev->fw_version >> 24) & 0xff),
+               ((dev->fw_version >> 16) & 0xff),
+               (dev->fw_version & 0xffff));
+
+       dev_dbg(&intf->dev, "Max outstanding tx = %d URBs\n", dev->max_tx_urbs);
+
+       err = dev->ops->dev_get_card_info(dev);
+       if (err) {
+               dev_err(&intf->dev, "Cannot get card info, error %d\n", err);
+               return err;
+       }
+
+       if (dev->ops->dev_get_capabilities) {
+               err = dev->ops->dev_get_capabilities(dev);
+               if (err) {
+                       dev_err(&intf->dev,
+                               "Cannot get capabilities, error %d\n", err);
+                       kvaser_usb_remove_interfaces(dev);
+                       return err;
+               }
+       }
+
+       for (i = 0; i < dev->nchannels; i++) {
+               err = kvaser_usb_init_one(dev, id, i);
+               if (err) {
+                       kvaser_usb_remove_interfaces(dev);
+                       return err;
+               }
+       }
+
+       return 0;
+}
+
+static void kvaser_usb_disconnect(struct usb_interface *intf)
+{
+       struct kvaser_usb *dev = usb_get_intfdata(intf);
+
+       usb_set_intfdata(intf, NULL);
+
+       if (!dev)
+               return;
+
+       kvaser_usb_remove_interfaces(dev);
+}
+
+static struct usb_driver kvaser_usb_driver = {
+       .name = "kvaser_usb",
+       .probe = kvaser_usb_probe,
+       .disconnect = kvaser_usb_disconnect,
+       .id_table = kvaser_usb_table,
+};
+
+module_usb_driver(kvaser_usb_driver);
+
+MODULE_AUTHOR("Olivier Sobrie <olivier@sobrie.be>");
+MODULE_AUTHOR("Kvaser AB <support@kvaser.com>");
+MODULE_DESCRIPTION("CAN driver for Kvaser CAN/USB devices");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
new file mode 100644 (file)
index 0000000..c084bae
--- /dev/null
@@ -0,0 +1,2028 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Parts of this driver are based on the following:
+ *  - Kvaser linux mhydra driver (version 5.24)
+ *  - CAN driver for esd CAN-USB/2
+ *
+ * Copyright (C) 2018 KVASER AB, Sweden. All rights reserved.
+ * Copyright (C) 2010 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh
+ *
+ * Known issues:
+ *  - Transition from CAN_STATE_ERROR_WARNING to CAN_STATE_ERROR_ACTIVE is only
+ *    reported after a call to do_get_berr_counter(), since firmware does not
+ *    distinguish between ERROR_WARNING and ERROR_ACTIVE.
+ *  - Hardware timestamps are not set for CAN Tx frames.
+ */
+
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/gfp.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/usb.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/can/netlink.h>
+
+#include "kvaser_usb.h"
+
+/* Forward declarations */
+static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_kcan;
+static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_flexc;
+
+#define KVASER_USB_HYDRA_BULK_EP_IN_ADDR       0x82
+#define KVASER_USB_HYDRA_BULK_EP_OUT_ADDR      0x02
+
+#define KVASER_USB_HYDRA_MAX_TRANSID           0xff
+#define KVASER_USB_HYDRA_MIN_TRANSID           0x01
+
+/* Minihydra command IDs */
+#define CMD_SET_BUSPARAMS_REQ                  16
+#define CMD_GET_CHIP_STATE_REQ                 19
+#define CMD_CHIP_STATE_EVENT                   20
+#define CMD_SET_DRIVERMODE_REQ                 21
+#define CMD_START_CHIP_REQ                     26
+#define CMD_START_CHIP_RESP                    27
+#define CMD_STOP_CHIP_REQ                      28
+#define CMD_STOP_CHIP_RESP                     29
+#define CMD_TX_CAN_MESSAGE                     33
+#define CMD_GET_CARD_INFO_REQ                  34
+#define CMD_GET_CARD_INFO_RESP                 35
+#define CMD_GET_SOFTWARE_INFO_REQ              38
+#define CMD_GET_SOFTWARE_INFO_RESP             39
+#define CMD_ERROR_EVENT                                45
+#define CMD_FLUSH_QUEUE                                48
+#define CMD_TX_ACKNOWLEDGE                     50
+#define CMD_FLUSH_QUEUE_RESP                   66
+#define CMD_SET_BUSPARAMS_FD_REQ               69
+#define CMD_SET_BUSPARAMS_FD_RESP              70
+#define CMD_SET_BUSPARAMS_RESP                 85
+#define CMD_GET_CAPABILITIES_REQ               95
+#define CMD_GET_CAPABILITIES_RESP              96
+#define CMD_RX_MESSAGE                         106
+#define CMD_MAP_CHANNEL_REQ                    200
+#define CMD_MAP_CHANNEL_RESP                   201
+#define CMD_GET_SOFTWARE_DETAILS_REQ           202
+#define CMD_GET_SOFTWARE_DETAILS_RESP          203
+#define CMD_EXTENDED                           255
+
+/* Minihydra extended command IDs */
+#define CMD_TX_CAN_MESSAGE_FD                  224
+#define CMD_TX_ACKNOWLEDGE_FD                  225
+#define CMD_RX_MESSAGE_FD                      226
+
+/* Hydra commands are handled by different threads in firmware.
+ * The threads are denoted hydra entity (HE). Each HE got a unique 6-bit
+ * address. The address is used in hydra commands to get/set source and
+ * destination HE. There are two predefined HE addresses, the remaining
+ * addresses are different between devices and firmware versions. Hence, we need
+ * to enumerate the addresses (see kvaser_usb_hydra_map_channel()).
+ */
+
+/* Well-known HE addresses */
+#define KVASER_USB_HYDRA_HE_ADDRESS_ROUTER     0x00
+#define KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL    0x3e
+
+#define KVASER_USB_HYDRA_TRANSID_CANHE         0x40
+#define KVASER_USB_HYDRA_TRANSID_SYSDBG                0x61
+
+struct kvaser_cmd_map_ch_req {
+       char name[16];
+       u8 channel;
+       u8 reserved[11];
+} __packed;
+
+struct kvaser_cmd_map_ch_res {
+       u8 he_addr;
+       u8 channel;
+       u8 reserved[26];
+} __packed;
+
+struct kvaser_cmd_card_info {
+       __le32 serial_number;
+       __le32 clock_res;
+       __le32 mfg_date;
+       __le32 ean[2];
+       u8 hw_version;
+       u8 usb_mode;
+       u8 hw_type;
+       u8 reserved0;
+       u8 nchannels;
+       u8 reserved1[3];
+} __packed;
+
+struct kvaser_cmd_sw_info {
+       u8 reserved0[8];
+       __le16 max_outstanding_tx;
+       u8 reserved1[18];
+} __packed;
+
+struct kvaser_cmd_sw_detail_req {
+       u8 use_ext_cmd;
+       u8 reserved[27];
+} __packed;
+
+/* Software detail flags */
+#define KVASER_USB_HYDRA_SW_FLAG_FW_BETA       BIT(2)
+#define KVASER_USB_HYDRA_SW_FLAG_FW_BAD                BIT(4)
+#define KVASER_USB_HYDRA_SW_FLAG_FREQ_80M      BIT(5)
+#define KVASER_USB_HYDRA_SW_FLAG_EXT_CMD       BIT(9)
+#define KVASER_USB_HYDRA_SW_FLAG_CANFD         BIT(10)
+#define KVASER_USB_HYDRA_SW_FLAG_NONISO                BIT(11)
+#define KVASER_USB_HYDRA_SW_FLAG_EXT_CAP       BIT(12)
+struct kvaser_cmd_sw_detail_res {
+       __le32 sw_flags;
+       __le32 sw_version;
+       __le32 sw_name;
+       __le32 ean[2];
+       __le32 max_bitrate;
+       u8 reserved[4];
+} __packed;
+
+/* Sub commands for cap_req and cap_res */
+#define KVASER_USB_HYDRA_CAP_CMD_LISTEN_MODE   0x02
+#define KVASER_USB_HYDRA_CAP_CMD_ERR_REPORT    0x05
+#define KVASER_USB_HYDRA_CAP_CMD_ONE_SHOT      0x06
+struct kvaser_cmd_cap_req {
+       __le16 cap_cmd;
+       u8 reserved[26];
+} __packed;
+
+/* Status codes for cap_res */
+#define KVASER_USB_HYDRA_CAP_STAT_OK           0x00
+#define KVASER_USB_HYDRA_CAP_STAT_NOT_IMPL     0x01
+#define KVASER_USB_HYDRA_CAP_STAT_UNAVAIL      0x02
+struct kvaser_cmd_cap_res {
+       __le16 cap_cmd;
+       __le16 status;
+       __le32 mask;
+       __le32 value;
+       u8 reserved[16];
+} __packed;
+
+/* CMD_ERROR_EVENT error codes */
+#define KVASER_USB_HYDRA_ERROR_EVENT_CAN       0x01
+#define KVASER_USB_HYDRA_ERROR_EVENT_PARAM     0x09
+struct kvaser_cmd_error_event {
+       __le16 timestamp[3];
+       u8 reserved;
+       u8 error_code;
+       __le16 info1;
+       __le16 info2;
+} __packed;
+
+/* Chip state status flags. Used for chip_state_event and err_frame_data. */
+#define KVASER_USB_HYDRA_BUS_ERR_ACT           0x00
+#define KVASER_USB_HYDRA_BUS_ERR_PASS          BIT(5)
+#define KVASER_USB_HYDRA_BUS_BUS_OFF           BIT(6)
+struct kvaser_cmd_chip_state_event {
+       __le16 timestamp[3];
+       u8 tx_err_counter;
+       u8 rx_err_counter;
+       u8 bus_status;
+       u8 reserved[19];
+} __packed;
+
+/* Busparam modes */
+#define KVASER_USB_HYDRA_BUS_MODE_CAN          0x00
+#define KVASER_USB_HYDRA_BUS_MODE_CANFD_ISO    0x01
+#define KVASER_USB_HYDRA_BUS_MODE_NONISO       0x02
+struct kvaser_cmd_set_busparams {
+       __le32 bitrate;
+       u8 tseg1;
+       u8 tseg2;
+       u8 sjw;
+       u8 nsamples;
+       u8 reserved0[4];
+       __le32 bitrate_d;
+       u8 tseg1_d;
+       u8 tseg2_d;
+       u8 sjw_d;
+       u8 nsamples_d;
+       u8 canfd_mode;
+       u8 reserved1[7];
+} __packed;
+
+/* Ctrl modes */
+#define KVASER_USB_HYDRA_CTRLMODE_NORMAL       0x01
+#define KVASER_USB_HYDRA_CTRLMODE_LISTEN       0x02
+struct kvaser_cmd_set_ctrlmode {
+       u8 mode;
+       u8 reserved[27];
+} __packed;
+
+struct kvaser_err_frame_data {
+       u8 bus_status;
+       u8 reserved0;
+       u8 tx_err_counter;
+       u8 rx_err_counter;
+       u8 reserved1[4];
+} __packed;
+
+struct kvaser_cmd_rx_can {
+       u8 cmd_len;
+       u8 cmd_no;
+       u8 channel;
+       u8 flags;
+       __le16 timestamp[3];
+       u8 dlc;
+       u8 padding;
+       __le32 id;
+       union {
+               u8 data[8];
+               struct kvaser_err_frame_data err_frame_data;
+       };
+} __packed;
+
+/* Extended CAN ID flag. Used in rx_can and tx_can */
+#define KVASER_USB_HYDRA_EXTENDED_FRAME_ID     BIT(31)
+struct kvaser_cmd_tx_can {
+       __le32 id;
+       u8 data[8];
+       u8 dlc;
+       u8 flags;
+       __le16 transid;
+       u8 channel;
+       u8 reserved[11];
+} __packed;
+
+struct kvaser_cmd_header {
+       u8 cmd_no;
+       /* The destination HE address is stored in 0..5 of he_addr.
+        * The upper part of source HE address is stored in 6..7 of he_addr, and
+        * the lower part is stored in 12..15 of transid.
+        */
+       u8 he_addr;
+       __le16 transid;
+} __packed;
+
+struct kvaser_cmd {
+       struct kvaser_cmd_header header;
+       union {
+               struct kvaser_cmd_map_ch_req map_ch_req;
+               struct kvaser_cmd_map_ch_res map_ch_res;
+
+               struct kvaser_cmd_card_info card_info;
+               struct kvaser_cmd_sw_info sw_info;
+               struct kvaser_cmd_sw_detail_req sw_detail_req;
+               struct kvaser_cmd_sw_detail_res sw_detail_res;
+
+               struct kvaser_cmd_cap_req cap_req;
+               struct kvaser_cmd_cap_res cap_res;
+
+               struct kvaser_cmd_error_event error_event;
+
+               struct kvaser_cmd_set_busparams set_busparams_req;
+
+               struct kvaser_cmd_chip_state_event chip_state_event;
+
+               struct kvaser_cmd_set_ctrlmode set_ctrlmode;
+
+               struct kvaser_cmd_rx_can rx_can;
+               struct kvaser_cmd_tx_can tx_can;
+       } __packed;
+} __packed;
+
+/* CAN frame flags. Used in rx_can, ext_rx_can, tx_can and ext_tx_can */
+#define KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME   BIT(0)
+#define KVASER_USB_HYDRA_CF_FLAG_OVERRUN       BIT(1)
+#define KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME  BIT(4)
+#define KVASER_USB_HYDRA_CF_FLAG_EXTENDED_ID   BIT(5)
+/* CAN frame flags. Used in ext_rx_can and ext_tx_can */
+#define KVASER_USB_HYDRA_CF_FLAG_OSM_NACK      BIT(12)
+#define KVASER_USB_HYDRA_CF_FLAG_ABL           BIT(13)
+#define KVASER_USB_HYDRA_CF_FLAG_FDF           BIT(16)
+#define KVASER_USB_HYDRA_CF_FLAG_BRS           BIT(17)
+#define KVASER_USB_HYDRA_CF_FLAG_ESI           BIT(18)
+
+/* KCAN packet header macros. Used in ext_rx_can and ext_tx_can */
+#define KVASER_USB_KCAN_DATA_DLC_BITS          4
+#define KVASER_USB_KCAN_DATA_DLC_SHIFT         8
+#define KVASER_USB_KCAN_DATA_DLC_MASK \
+                               GENMASK(KVASER_USB_KCAN_DATA_DLC_BITS - 1 + \
+                               KVASER_USB_KCAN_DATA_DLC_SHIFT, \
+                               KVASER_USB_KCAN_DATA_DLC_SHIFT)
+
+#define KVASER_USB_KCAN_DATA_BRS               BIT(14)
+#define KVASER_USB_KCAN_DATA_FDF               BIT(15)
+#define KVASER_USB_KCAN_DATA_OSM               BIT(16)
+#define KVASER_USB_KCAN_DATA_AREQ              BIT(31)
+#define KVASER_USB_KCAN_DATA_SRR               BIT(31)
+#define KVASER_USB_KCAN_DATA_RTR               BIT(29)
+#define KVASER_USB_KCAN_DATA_IDE               BIT(30)
+struct kvaser_cmd_ext_rx_can {
+       __le32 flags;
+       __le32 id;
+       __le32 kcan_id;
+       __le32 kcan_header;
+       __le64 timestamp;
+       union {
+               u8 kcan_payload[64];
+               struct kvaser_err_frame_data err_frame_data;
+       };
+} __packed;
+
+struct kvaser_cmd_ext_tx_can {
+       __le32 flags;
+       __le32 id;
+       __le32 kcan_id;
+       __le32 kcan_header;
+       u8 databytes;
+       u8 dlc;
+       u8 reserved[6];
+       u8 kcan_payload[64];
+} __packed;
+
+struct kvaser_cmd_ext_tx_ack {
+       __le32 flags;
+       u8 reserved0[4];
+       __le64 timestamp;
+       u8 reserved1[8];
+} __packed;
+
+/* struct for extended commands (CMD_EXTENDED) */
+struct kvaser_cmd_ext {
+       struct kvaser_cmd_header header;
+       __le16 len;
+       u8 cmd_no_ext;
+       u8 reserved;
+
+       union {
+               struct kvaser_cmd_ext_rx_can rx_can;
+               struct kvaser_cmd_ext_tx_can tx_can;
+               struct kvaser_cmd_ext_tx_ack tx_ack;
+       } __packed;
+} __packed;
+
+static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = {
+       .name = "kvaser_usb_kcan",
+       .tseg1_min = 1,
+       .tseg1_max = 255,
+       .tseg2_min = 1,
+       .tseg2_max = 32,
+       .sjw_max = 16,
+       .brp_min = 1,
+       .brp_max = 4096,
+       .brp_inc = 1,
+};
+
+static const struct can_bittiming_const kvaser_usb_hydra_flexc_bittiming_c = {
+       .name = "kvaser_usb_flex",
+       .tseg1_min = 4,
+       .tseg1_max = 16,
+       .tseg2_min = 2,
+       .tseg2_max = 8,
+       .sjw_max = 4,
+       .brp_min = 1,
+       .brp_max = 256,
+       .brp_inc = 1,
+};
+
+#define KVASER_USB_HYDRA_TRANSID_BITS          12
+#define KVASER_USB_HYDRA_TRANSID_MASK \
+                               GENMASK(KVASER_USB_HYDRA_TRANSID_BITS - 1, 0)
+#define KVASER_USB_HYDRA_HE_ADDR_SRC_MASK      GENMASK(7, 6)
+#define KVASER_USB_HYDRA_HE_ADDR_DEST_MASK     GENMASK(5, 0)
+#define KVASER_USB_HYDRA_HE_ADDR_SRC_BITS      2
+static inline u16 kvaser_usb_hydra_get_cmd_transid(const struct kvaser_cmd *cmd)
+{
+       return le16_to_cpu(cmd->header.transid) & KVASER_USB_HYDRA_TRANSID_MASK;
+}
+
+static inline void kvaser_usb_hydra_set_cmd_transid(struct kvaser_cmd *cmd,
+                                                   u16 transid)
+{
+       cmd->header.transid =
+                       cpu_to_le16(transid & KVASER_USB_HYDRA_TRANSID_MASK);
+}
+
+static inline u8 kvaser_usb_hydra_get_cmd_src_he(const struct kvaser_cmd *cmd)
+{
+       return (cmd->header.he_addr & KVASER_USB_HYDRA_HE_ADDR_SRC_MASK) >>
+               KVASER_USB_HYDRA_HE_ADDR_SRC_BITS |
+               le16_to_cpu(cmd->header.transid) >>
+               KVASER_USB_HYDRA_TRANSID_BITS;
+}
+
+static inline void kvaser_usb_hydra_set_cmd_dest_he(struct kvaser_cmd *cmd,
+                                                   u8 dest_he)
+{
+       cmd->header.he_addr =
+               (cmd->header.he_addr & KVASER_USB_HYDRA_HE_ADDR_SRC_MASK) |
+               (dest_he & KVASER_USB_HYDRA_HE_ADDR_DEST_MASK);
+}
+
+static u8 kvaser_usb_hydra_channel_from_cmd(const struct kvaser_usb *dev,
+                                           const struct kvaser_cmd *cmd)
+{
+       int i;
+       u8 channel = 0xff;
+       u8 src_he = kvaser_usb_hydra_get_cmd_src_he(cmd);
+
+       for (i = 0; i < KVASER_USB_MAX_NET_DEVICES; i++) {
+               if (dev->card_data.hydra.channel_to_he[i] == src_he) {
+                       channel = i;
+                       break;
+               }
+       }
+
+       return channel;
+}
+
+static u16 kvaser_usb_hydra_get_next_transid(struct kvaser_usb *dev)
+{
+       unsigned long flags;
+       u16 transid;
+       struct kvaser_usb_dev_card_data_hydra *card_data =
+                                                       &dev->card_data.hydra;
+
+       spin_lock_irqsave(&card_data->transid_lock, flags);
+       transid = card_data->transid;
+       if (transid >= KVASER_USB_HYDRA_MAX_TRANSID)
+               transid = KVASER_USB_HYDRA_MIN_TRANSID;
+       else
+               transid++;
+       card_data->transid = transid;
+       spin_unlock_irqrestore(&card_data->transid_lock, flags);
+
+       return transid;
+}
+
+static size_t kvaser_usb_hydra_cmd_size(struct kvaser_cmd *cmd)
+{
+       size_t ret;
+
+       if (cmd->header.cmd_no == CMD_EXTENDED)
+               ret = le16_to_cpu(((struct kvaser_cmd_ext *)cmd)->len);
+       else
+               ret = sizeof(struct kvaser_cmd);
+
+       return ret;
+}
+
+static struct kvaser_usb_net_priv *
+kvaser_usb_hydra_net_priv_from_cmd(const struct kvaser_usb *dev,
+                                  const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_net_priv *priv = NULL;
+       u8 channel = kvaser_usb_hydra_channel_from_cmd(dev, cmd);
+
+       if (channel >= dev->nchannels)
+               dev_err(&dev->intf->dev,
+                       "Invalid channel number (%d)\n", channel);
+       else
+               priv = dev->nets[channel];
+
+       return priv;
+}
+
+static ktime_t
+kvaser_usb_hydra_ktime_from_rx_cmd(const struct kvaser_usb_dev_cfg *cfg,
+                                  const struct kvaser_cmd *cmd)
+{
+       u64 ticks;
+
+       if (cmd->header.cmd_no == CMD_EXTENDED) {
+               struct kvaser_cmd_ext *cmd_ext = (struct kvaser_cmd_ext *)cmd;
+
+               ticks = le64_to_cpu(cmd_ext->rx_can.timestamp);
+       } else {
+               ticks = le16_to_cpu(cmd->rx_can.timestamp[0]);
+               ticks += (u64)(le16_to_cpu(cmd->rx_can.timestamp[1])) << 16;
+               ticks += (u64)(le16_to_cpu(cmd->rx_can.timestamp[2])) << 32;
+       }
+
+       return ns_to_ktime(div_u64(ticks * 1000, cfg->timestamp_freq));
+}
+
+static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev,
+                                           u8 cmd_no, int channel)
+{
+       struct kvaser_cmd *cmd;
+       int err;
+
+       cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->header.cmd_no = cmd_no;
+       if (channel < 0) {
+               kvaser_usb_hydra_set_cmd_dest_he
+                               (cmd, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL);
+       } else {
+               if (channel >= KVASER_USB_MAX_NET_DEVICES) {
+                       dev_err(&dev->intf->dev, "channel (%d) out of range.\n",
+                               channel);
+                       err = -EINVAL;
+                       goto end;
+               }
+               kvaser_usb_hydra_set_cmd_dest_he
+                       (cmd, dev->card_data.hydra.channel_to_he[channel]);
+       }
+       kvaser_usb_hydra_set_cmd_transid
+                               (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+       err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+       if (err)
+               goto end;
+
+end:
+       kfree(cmd);
+
+       return err;
+}
+
+static int
+kvaser_usb_hydra_send_simple_cmd_async(struct kvaser_usb_net_priv *priv,
+                                      u8 cmd_no)
+{
+       struct kvaser_cmd *cmd;
+       struct kvaser_usb *dev = priv->dev;
+       int err;
+
+       cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_ATOMIC);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->header.cmd_no = cmd_no;
+
+       kvaser_usb_hydra_set_cmd_dest_he
+               (cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
+       kvaser_usb_hydra_set_cmd_transid
+                               (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+       err = kvaser_usb_send_cmd_async(priv, cmd,
+                                       kvaser_usb_hydra_cmd_size(cmd));
+       if (err)
+               kfree(cmd);
+
+       return err;
+}
+
+/* This function is used for synchronously waiting on hydra control commands.
+ * Note: Compared to kvaser_usb_hydra_read_bulk_callback(), we never need to
+ *       handle partial hydra commands. Since hydra control commands are always
+ *       non-extended commands.
+ */
+static int kvaser_usb_hydra_wait_cmd(const struct kvaser_usb *dev, u8 cmd_no,
+                                    struct kvaser_cmd *cmd)
+{
+       void *buf;
+       int err;
+       unsigned long timeout = jiffies + msecs_to_jiffies(KVASER_USB_TIMEOUT);
+
+       if (cmd->header.cmd_no == CMD_EXTENDED) {
+               dev_err(&dev->intf->dev, "Wait for CMD_EXTENDED not allowed\n");
+               return -EINVAL;
+       }
+
+       buf = kzalloc(KVASER_USB_RX_BUFFER_SIZE, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       do {
+               int actual_len = 0;
+               int pos = 0;
+
+               err = kvaser_usb_recv_cmd(dev, buf, KVASER_USB_RX_BUFFER_SIZE,
+                                         &actual_len);
+               if (err < 0)
+                       goto end;
+
+               while (pos < actual_len) {
+                       struct kvaser_cmd *tmp_cmd;
+                       size_t cmd_len;
+
+                       tmp_cmd = buf + pos;
+                       cmd_len = kvaser_usb_hydra_cmd_size(tmp_cmd);
+                       if (pos + cmd_len > actual_len) {
+                               dev_err_ratelimited(&dev->intf->dev,
+                                                   "Format error\n");
+                               break;
+                       }
+
+                       if (tmp_cmd->header.cmd_no == cmd_no) {
+                               memcpy(cmd, tmp_cmd, cmd_len);
+                               goto end;
+                       }
+                       pos += cmd_len;
+               }
+       } while (time_before(jiffies, timeout));
+
+       err = -EINVAL;
+
+end:
+       kfree(buf);
+
+       return err;
+}
+
+static int kvaser_usb_hydra_map_channel_resp(struct kvaser_usb *dev,
+                                            const struct kvaser_cmd *cmd)
+{
+       u8 he, channel;
+       u16 transid = kvaser_usb_hydra_get_cmd_transid(cmd);
+       struct kvaser_usb_dev_card_data_hydra *card_data =
+                                                       &dev->card_data.hydra;
+
+       if (transid > 0x007f || transid < 0x0040) {
+               dev_err(&dev->intf->dev,
+                       "CMD_MAP_CHANNEL_RESP, invalid transid: 0x%x\n",
+                       transid);
+               return -EINVAL;
+       }
+
+       switch (transid) {
+       case KVASER_USB_HYDRA_TRANSID_CANHE:
+       case KVASER_USB_HYDRA_TRANSID_CANHE + 1:
+       case KVASER_USB_HYDRA_TRANSID_CANHE + 2:
+       case KVASER_USB_HYDRA_TRANSID_CANHE + 3:
+       case KVASER_USB_HYDRA_TRANSID_CANHE + 4:
+               channel = transid & 0x000f;
+               he = cmd->map_ch_res.he_addr;
+               card_data->channel_to_he[channel] = he;
+               break;
+       case KVASER_USB_HYDRA_TRANSID_SYSDBG:
+               card_data->sysdbg_he = cmd->map_ch_res.he_addr;
+               break;
+       default:
+               dev_warn(&dev->intf->dev,
+                        "Unknown CMD_MAP_CHANNEL_RESP transid=0x%x\n",
+                        transid);
+               break;
+       }
+
+       return 0;
+}
+
+static int kvaser_usb_hydra_map_channel(struct kvaser_usb *dev, u16 transid,
+                                       u8 channel, const char *name)
+{
+       struct kvaser_cmd *cmd;
+       int err;
+
+       cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       strcpy(cmd->map_ch_req.name, name);
+       cmd->header.cmd_no = CMD_MAP_CHANNEL_REQ;
+       kvaser_usb_hydra_set_cmd_dest_he
+                               (cmd, KVASER_USB_HYDRA_HE_ADDRESS_ROUTER);
+       cmd->map_ch_req.channel = channel;
+
+       kvaser_usb_hydra_set_cmd_transid(cmd, transid);
+
+       err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+       if (err)
+               goto end;
+
+       err = kvaser_usb_hydra_wait_cmd(dev, CMD_MAP_CHANNEL_RESP, cmd);
+       if (err)
+               goto end;
+
+       err = kvaser_usb_hydra_map_channel_resp(dev, cmd);
+       if (err)
+               goto end;
+
+end:
+       kfree(cmd);
+
+       return err;
+}
+
+static int kvaser_usb_hydra_get_single_capability(struct kvaser_usb *dev,
+                                                 u16 cap_cmd_req, u16 *status)
+{
+       struct kvaser_usb_dev_card_data *card_data = &dev->card_data;
+       struct kvaser_cmd *cmd;
+       u32 value = 0;
+       u32 mask = 0;
+       u16 cap_cmd_res;
+       int err;
+       int i;
+
+       cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->header.cmd_no = CMD_GET_CAPABILITIES_REQ;
+       cmd->cap_req.cap_cmd = cpu_to_le16(cap_cmd_req);
+
+       kvaser_usb_hydra_set_cmd_dest_he(cmd, card_data->hydra.sysdbg_he);
+       kvaser_usb_hydra_set_cmd_transid
+                               (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+       err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+       if (err)
+               goto end;
+
+       err = kvaser_usb_hydra_wait_cmd(dev, CMD_GET_CAPABILITIES_RESP, cmd);
+       if (err)
+               goto end;
+
+       *status = le16_to_cpu(cmd->cap_res.status);
+
+       if (*status != KVASER_USB_HYDRA_CAP_STAT_OK)
+               goto end;
+
+       cap_cmd_res = le16_to_cpu(cmd->cap_res.cap_cmd);
+       switch (cap_cmd_res) {
+       case KVASER_USB_HYDRA_CAP_CMD_LISTEN_MODE:
+       case KVASER_USB_HYDRA_CAP_CMD_ERR_REPORT:
+       case KVASER_USB_HYDRA_CAP_CMD_ONE_SHOT:
+               value = le32_to_cpu(cmd->cap_res.value);
+               mask = le32_to_cpu(cmd->cap_res.mask);
+               break;
+       default:
+               dev_warn(&dev->intf->dev, "Unknown capability command %u\n",
+                        cap_cmd_res);
+               break;
+       }
+
+       for (i = 0; i < dev->nchannels; i++) {
+               if (BIT(i) & (value & mask)) {
+                       switch (cap_cmd_res) {
+                       case KVASER_USB_HYDRA_CAP_CMD_LISTEN_MODE:
+                               card_data->ctrlmode_supported |=
+                                               CAN_CTRLMODE_LISTENONLY;
+                               break;
+                       case KVASER_USB_HYDRA_CAP_CMD_ERR_REPORT:
+                               card_data->capabilities |=
+                                               KVASER_USB_CAP_BERR_CAP;
+                               break;
+                       case KVASER_USB_HYDRA_CAP_CMD_ONE_SHOT:
+                               card_data->ctrlmode_supported |=
+                                               CAN_CTRLMODE_ONE_SHOT;
+                               break;
+                       }
+               }
+       }
+
+end:
+       kfree(cmd);
+
+       return err;
+}
+
+static void kvaser_usb_hydra_start_chip_reply(const struct kvaser_usb *dev,
+                                             const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_net_priv *priv;
+
+       priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd);
+       if (!priv)
+               return;
+
+       if (completion_done(&priv->start_comp) &&
+           netif_queue_stopped(priv->netdev)) {
+               netif_wake_queue(priv->netdev);
+       } else {
+               netif_start_queue(priv->netdev);
+               complete(&priv->start_comp);
+       }
+}
+
+static void kvaser_usb_hydra_stop_chip_reply(const struct kvaser_usb *dev,
+                                            const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_net_priv *priv;
+
+       priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd);
+       if (!priv)
+               return;
+
+       complete(&priv->stop_comp);
+}
+
+static void kvaser_usb_hydra_flush_queue_reply(const struct kvaser_usb *dev,
+                                              const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_net_priv *priv;
+
+       priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd);
+       if (!priv)
+               return;
+
+       complete(&priv->flush_comp);
+}
+
+static void
+kvaser_usb_hydra_bus_status_to_can_state(const struct kvaser_usb_net_priv *priv,
+                                        u8 bus_status,
+                                        const struct can_berr_counter *bec,
+                                        enum can_state *new_state)
+{
+       if (bus_status & KVASER_USB_HYDRA_BUS_BUS_OFF) {
+               *new_state = CAN_STATE_BUS_OFF;
+       } else if (bus_status & KVASER_USB_HYDRA_BUS_ERR_PASS) {
+               *new_state = CAN_STATE_ERROR_PASSIVE;
+       } else if (bus_status == KVASER_USB_HYDRA_BUS_ERR_ACT) {
+               if (bec->txerr >= 128 || bec->rxerr >= 128) {
+                       netdev_warn(priv->netdev,
+                                   "ERR_ACTIVE but err tx=%u or rx=%u >=128\n",
+                                   bec->txerr, bec->rxerr);
+                       *new_state = CAN_STATE_ERROR_PASSIVE;
+               } else if (bec->txerr >= 96 || bec->rxerr >= 96) {
+                       *new_state = CAN_STATE_ERROR_WARNING;
+               } else {
+                       *new_state = CAN_STATE_ERROR_ACTIVE;
+               }
+       }
+}
+
+static void kvaser_usb_hydra_update_state(struct kvaser_usb_net_priv *priv,
+                                         u8 bus_status,
+                                         const struct can_berr_counter *bec)
+{
+       struct net_device *netdev = priv->netdev;
+       struct can_frame *cf;
+       struct sk_buff *skb;
+       struct net_device_stats *stats;
+       enum can_state new_state, old_state;
+
+       old_state = priv->can.state;
+
+       kvaser_usb_hydra_bus_status_to_can_state(priv, bus_status, bec,
+                                                &new_state);
+
+       if (new_state == old_state)
+               return;
+
+       /* Ignore state change if previous state was STOPPED and the new state
+        * is BUS_OFF. Firmware always report this as BUS_OFF, since firmware
+        * does not distinguish between BUS_OFF and STOPPED.
+        */
+       if (old_state == CAN_STATE_STOPPED && new_state == CAN_STATE_BUS_OFF)
+               return;
+
+       skb = alloc_can_err_skb(netdev, &cf);
+       if (skb) {
+               enum can_state tx_state, rx_state;
+
+               tx_state = (bec->txerr >= bec->rxerr) ?
+                                       new_state : CAN_STATE_ERROR_ACTIVE;
+               rx_state = (bec->txerr <= bec->rxerr) ?
+                                       new_state : CAN_STATE_ERROR_ACTIVE;
+               can_change_state(netdev, cf, tx_state, rx_state);
+       }
+
+       if (new_state == CAN_STATE_BUS_OFF && old_state < CAN_STATE_BUS_OFF) {
+               if (!priv->can.restart_ms)
+                       kvaser_usb_hydra_send_simple_cmd_async
+                                               (priv, CMD_STOP_CHIP_REQ);
+
+               can_bus_off(netdev);
+       }
+
+       if (!skb) {
+               netdev_warn(netdev, "No memory left for err_skb\n");
+               return;
+       }
+
+       if (priv->can.restart_ms &&
+           old_state >= CAN_STATE_BUS_OFF &&
+           new_state < CAN_STATE_BUS_OFF)
+               priv->can.can_stats.restarts++;
+
+       cf->data[6] = bec->txerr;
+       cf->data[7] = bec->rxerr;
+
+       stats = &netdev->stats;
+       stats->rx_packets++;
+       stats->rx_bytes += cf->can_dlc;
+       netif_rx(skb);
+}
+
+static void kvaser_usb_hydra_state_event(const struct kvaser_usb *dev,
+                                        const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_net_priv *priv;
+       struct can_berr_counter bec;
+       u8 bus_status;
+
+       priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd);
+       if (!priv)
+               return;
+
+       bus_status = cmd->chip_state_event.bus_status;
+       bec.txerr = cmd->chip_state_event.tx_err_counter;
+       bec.rxerr = cmd->chip_state_event.rx_err_counter;
+
+       kvaser_usb_hydra_update_state(priv, bus_status, &bec);
+       priv->bec.txerr = bec.txerr;
+       priv->bec.rxerr = bec.rxerr;
+}
+
+static void kvaser_usb_hydra_error_event_parameter(const struct kvaser_usb *dev,
+                                                  const struct kvaser_cmd *cmd)
+{
+       /* info1 will contain the offending cmd_no */
+       switch (le16_to_cpu(cmd->error_event.info1)) {
+       case CMD_START_CHIP_REQ:
+               dev_warn(&dev->intf->dev,
+                        "CMD_START_CHIP_REQ error in parameter\n");
+               break;
+
+       case CMD_STOP_CHIP_REQ:
+               dev_warn(&dev->intf->dev,
+                        "CMD_STOP_CHIP_REQ error in parameter\n");
+               break;
+
+       case CMD_FLUSH_QUEUE:
+               dev_warn(&dev->intf->dev,
+                        "CMD_FLUSH_QUEUE error in parameter\n");
+               break;
+
+       case CMD_SET_BUSPARAMS_REQ:
+               dev_warn(&dev->intf->dev,
+                        "Set bittiming failed. Error in parameter\n");
+               break;
+
+       case CMD_SET_BUSPARAMS_FD_REQ:
+               dev_warn(&dev->intf->dev,
+                        "Set data bittiming failed. Error in parameter\n");
+               break;
+
+       default:
+               dev_warn(&dev->intf->dev,
+                        "Unhandled parameter error event cmd_no (%u)\n",
+                        le16_to_cpu(cmd->error_event.info1));
+               break;
+       }
+}
+
+static void kvaser_usb_hydra_error_event(const struct kvaser_usb *dev,
+                                        const struct kvaser_cmd *cmd)
+{
+       switch (cmd->error_event.error_code) {
+       case KVASER_USB_HYDRA_ERROR_EVENT_PARAM:
+               kvaser_usb_hydra_error_event_parameter(dev, cmd);
+               break;
+
+       case KVASER_USB_HYDRA_ERROR_EVENT_CAN:
+               /* Wrong channel mapping?! This should never happen!
+                * info1 will contain the offending cmd_no
+                */
+               dev_err(&dev->intf->dev,
+                       "Received CAN error event for cmd_no (%u)\n",
+                       le16_to_cpu(cmd->error_event.info1));
+               break;
+
+       default:
+               dev_warn(&dev->intf->dev,
+                        "Unhandled error event (%d)\n",
+                        cmd->error_event.error_code);
+               break;
+       }
+}
+
+static void
+kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv,
+                            const struct kvaser_err_frame_data *err_frame_data,
+                            ktime_t hwtstamp)
+{
+       struct net_device *netdev = priv->netdev;
+       struct net_device_stats *stats = &netdev->stats;
+       struct can_frame *cf;
+       struct sk_buff *skb;
+       struct skb_shared_hwtstamps *shhwtstamps;
+       struct can_berr_counter bec;
+       enum can_state new_state, old_state;
+       u8 bus_status;
+
+       priv->can.can_stats.bus_error++;
+       stats->rx_errors++;
+
+       bus_status = err_frame_data->bus_status;
+       bec.txerr = err_frame_data->tx_err_counter;
+       bec.rxerr = err_frame_data->rx_err_counter;
+
+       old_state = priv->can.state;
+       kvaser_usb_hydra_bus_status_to_can_state(priv, bus_status, &bec,
+                                                &new_state);
+
+       skb = alloc_can_err_skb(netdev, &cf);
+
+       if (new_state != old_state) {
+               if (skb) {
+                       enum can_state tx_state, rx_state;
+
+                       tx_state = (bec.txerr >= bec.rxerr) ?
+                                       new_state : CAN_STATE_ERROR_ACTIVE;
+                       rx_state = (bec.txerr <= bec.rxerr) ?
+                                       new_state : CAN_STATE_ERROR_ACTIVE;
+
+                       can_change_state(netdev, cf, tx_state, rx_state);
+               }
+
+               if (new_state == CAN_STATE_BUS_OFF) {
+                       if (!priv->can.restart_ms)
+                               kvaser_usb_hydra_send_simple_cmd_async
+                                               (priv, CMD_STOP_CHIP_REQ);
+
+                       can_bus_off(netdev);
+               }
+
+               if (priv->can.restart_ms &&
+                   old_state >= CAN_STATE_BUS_OFF &&
+                   new_state < CAN_STATE_BUS_OFF)
+                       cf->can_id |= CAN_ERR_RESTARTED;
+       }
+
+       if (!skb) {
+               stats->rx_dropped++;
+               netdev_warn(netdev, "No memory left for err_skb\n");
+               return;
+       }
+
+       shhwtstamps = skb_hwtstamps(skb);
+       shhwtstamps->hwtstamp = hwtstamp;
+
+       cf->can_id |= CAN_ERR_BUSERROR;
+       cf->data[6] = bec.txerr;
+       cf->data[7] = bec.rxerr;
+
+       stats->rx_packets++;
+       stats->rx_bytes += cf->can_dlc;
+       netif_rx(skb);
+
+       priv->bec.txerr = bec.txerr;
+       priv->bec.rxerr = bec.rxerr;
+}
+
+static void kvaser_usb_hydra_one_shot_fail(struct kvaser_usb_net_priv *priv,
+                                          const struct kvaser_cmd_ext *cmd)
+{
+       struct net_device *netdev = priv->netdev;
+       struct net_device_stats *stats = &netdev->stats;
+       struct can_frame *cf;
+       struct sk_buff *skb;
+       u32 flags;
+
+       skb = alloc_can_err_skb(netdev, &cf);
+       if (!skb) {
+               stats->rx_dropped++;
+               netdev_warn(netdev, "No memory left for err_skb\n");
+               return;
+       }
+
+       cf->can_id |= CAN_ERR_BUSERROR;
+       flags = le32_to_cpu(cmd->tx_ack.flags);
+
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_OSM_NACK)
+               cf->can_id |= CAN_ERR_ACK;
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_ABL) {
+               cf->can_id |= CAN_ERR_LOSTARB;
+               priv->can.can_stats.arbitration_lost++;
+       }
+
+       stats->tx_errors++;
+       stats->rx_packets++;
+       stats->rx_bytes += cf->can_dlc;
+       netif_rx(skb);
+}
+
+static void kvaser_usb_hydra_tx_acknowledge(const struct kvaser_usb *dev,
+                                           const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_tx_urb_context *context;
+       struct kvaser_usb_net_priv *priv;
+       unsigned long irq_flags;
+       bool one_shot_fail = false;
+       u16 transid = kvaser_usb_hydra_get_cmd_transid(cmd);
+
+       priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd);
+       if (!priv)
+               return;
+
+       if (!netif_device_present(priv->netdev))
+               return;
+
+       if (cmd->header.cmd_no == CMD_EXTENDED) {
+               struct kvaser_cmd_ext *cmd_ext = (struct kvaser_cmd_ext *)cmd;
+               u32 flags = le32_to_cpu(cmd_ext->tx_ack.flags);
+
+               if (flags & (KVASER_USB_HYDRA_CF_FLAG_OSM_NACK |
+                            KVASER_USB_HYDRA_CF_FLAG_ABL)) {
+                       kvaser_usb_hydra_one_shot_fail(priv, cmd_ext);
+                       one_shot_fail = true;
+               }
+       }
+
+       context = &priv->tx_contexts[transid % dev->max_tx_urbs];
+       if (!one_shot_fail) {
+               struct net_device_stats *stats = &priv->netdev->stats;
+
+               stats->tx_packets++;
+               stats->tx_bytes += can_dlc2len(context->dlc);
+       }
+
+       spin_lock_irqsave(&priv->tx_contexts_lock, irq_flags);
+
+       can_get_echo_skb(priv->netdev, context->echo_index);
+       context->echo_index = dev->max_tx_urbs;
+       --priv->active_tx_contexts;
+       netif_wake_queue(priv->netdev);
+
+       spin_unlock_irqrestore(&priv->tx_contexts_lock, irq_flags);
+}
+
+static void kvaser_usb_hydra_rx_msg_std(const struct kvaser_usb *dev,
+                                       const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_net_priv *priv = NULL;
+       struct can_frame *cf;
+       struct sk_buff *skb;
+       struct skb_shared_hwtstamps *shhwtstamps;
+       struct net_device_stats *stats;
+       u8 flags;
+       ktime_t hwtstamp;
+
+       priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd);
+       if (!priv)
+               return;
+
+       stats = &priv->netdev->stats;
+
+       flags = cmd->rx_can.flags;
+       hwtstamp = kvaser_usb_hydra_ktime_from_rx_cmd(dev->cfg, cmd);
+
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME) {
+               kvaser_usb_hydra_error_frame(priv, &cmd->rx_can.err_frame_data,
+                                            hwtstamp);
+               return;
+       }
+
+       skb = alloc_can_skb(priv->netdev, &cf);
+       if (!skb) {
+               stats->rx_dropped++;
+               return;
+       }
+
+       shhwtstamps = skb_hwtstamps(skb);
+       shhwtstamps->hwtstamp = hwtstamp;
+
+       cf->can_id = le32_to_cpu(cmd->rx_can.id);
+
+       if (cf->can_id &  KVASER_USB_HYDRA_EXTENDED_FRAME_ID) {
+               cf->can_id &= CAN_EFF_MASK;
+               cf->can_id |= CAN_EFF_FLAG;
+       } else {
+               cf->can_id &= CAN_SFF_MASK;
+       }
+
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_OVERRUN)
+               kvaser_usb_can_rx_over_error(priv->netdev);
+
+       cf->can_dlc = get_can_dlc(cmd->rx_can.dlc);
+
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME)
+               cf->can_id |= CAN_RTR_FLAG;
+       else
+               memcpy(cf->data, cmd->rx_can.data, cf->can_dlc);
+
+       stats->rx_packets++;
+       stats->rx_bytes += cf->can_dlc;
+       netif_rx(skb);
+}
+
+static void kvaser_usb_hydra_rx_msg_ext(const struct kvaser_usb *dev,
+                                       const struct kvaser_cmd_ext *cmd)
+{
+       struct kvaser_cmd *std_cmd = (struct kvaser_cmd *)cmd;
+       struct kvaser_usb_net_priv *priv;
+       struct canfd_frame *cf;
+       struct sk_buff *skb;
+       struct skb_shared_hwtstamps *shhwtstamps;
+       struct net_device_stats *stats;
+       u32 flags;
+       u8 dlc;
+       u32 kcan_header;
+       ktime_t hwtstamp;
+
+       priv = kvaser_usb_hydra_net_priv_from_cmd(dev, std_cmd);
+       if (!priv)
+               return;
+
+       stats = &priv->netdev->stats;
+
+       kcan_header = le32_to_cpu(cmd->rx_can.kcan_header);
+       dlc = (kcan_header & KVASER_USB_KCAN_DATA_DLC_MASK) >>
+               KVASER_USB_KCAN_DATA_DLC_SHIFT;
+
+       flags = le32_to_cpu(cmd->rx_can.flags);
+       hwtstamp = kvaser_usb_hydra_ktime_from_rx_cmd(dev->cfg, std_cmd);
+
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME) {
+               kvaser_usb_hydra_error_frame(priv, &cmd->rx_can.err_frame_data,
+                                            hwtstamp);
+               return;
+       }
+
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_FDF)
+               skb = alloc_canfd_skb(priv->netdev, &cf);
+       else
+               skb = alloc_can_skb(priv->netdev, (struct can_frame **)&cf);
+
+       if (!skb) {
+               stats->rx_dropped++;
+               return;
+       }
+
+       shhwtstamps = skb_hwtstamps(skb);
+       shhwtstamps->hwtstamp = hwtstamp;
+
+       cf->can_id = le32_to_cpu(cmd->rx_can.id);
+
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_EXTENDED_ID) {
+               cf->can_id &= CAN_EFF_MASK;
+               cf->can_id |= CAN_EFF_FLAG;
+       } else {
+               cf->can_id &= CAN_SFF_MASK;
+       }
+
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_OVERRUN)
+               kvaser_usb_can_rx_over_error(priv->netdev);
+
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_FDF) {
+               cf->len = can_dlc2len(get_canfd_dlc(dlc));
+               if (flags & KVASER_USB_HYDRA_CF_FLAG_BRS)
+                       cf->flags |= CANFD_BRS;
+               if (flags & KVASER_USB_HYDRA_CF_FLAG_ESI)
+                       cf->flags |= CANFD_ESI;
+       } else {
+               cf->len = get_can_dlc(dlc);
+       }
+
+       if (flags & KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME)
+               cf->can_id |= CAN_RTR_FLAG;
+       else
+               memcpy(cf->data, cmd->rx_can.kcan_payload, cf->len);
+
+       stats->rx_packets++;
+       stats->rx_bytes += cf->len;
+       netif_rx(skb);
+}
+
+static void kvaser_usb_hydra_handle_cmd_std(const struct kvaser_usb *dev,
+                                           const struct kvaser_cmd *cmd)
+{
+       switch (cmd->header.cmd_no) {
+       case CMD_START_CHIP_RESP:
+               kvaser_usb_hydra_start_chip_reply(dev, cmd);
+               break;
+
+       case CMD_STOP_CHIP_RESP:
+               kvaser_usb_hydra_stop_chip_reply(dev, cmd);
+               break;
+
+       case CMD_FLUSH_QUEUE_RESP:
+               kvaser_usb_hydra_flush_queue_reply(dev, cmd);
+               break;
+
+       case CMD_CHIP_STATE_EVENT:
+               kvaser_usb_hydra_state_event(dev, cmd);
+               break;
+
+       case CMD_ERROR_EVENT:
+               kvaser_usb_hydra_error_event(dev, cmd);
+               break;
+
+       case CMD_TX_ACKNOWLEDGE:
+               kvaser_usb_hydra_tx_acknowledge(dev, cmd);
+               break;
+
+       case CMD_RX_MESSAGE:
+               kvaser_usb_hydra_rx_msg_std(dev, cmd);
+               break;
+
+       /* Ignored commands */
+       case CMD_SET_BUSPARAMS_RESP:
+       case CMD_SET_BUSPARAMS_FD_RESP:
+               break;
+
+       default:
+               dev_warn(&dev->intf->dev, "Unhandled command (%d)\n",
+                        cmd->header.cmd_no);
+               break;
+       }
+}
+
+static void kvaser_usb_hydra_handle_cmd_ext(const struct kvaser_usb *dev,
+                                           const struct kvaser_cmd_ext *cmd)
+{
+       switch (cmd->cmd_no_ext) {
+       case CMD_TX_ACKNOWLEDGE_FD:
+               kvaser_usb_hydra_tx_acknowledge(dev, (struct kvaser_cmd *)cmd);
+               break;
+
+       case CMD_RX_MESSAGE_FD:
+               kvaser_usb_hydra_rx_msg_ext(dev, cmd);
+               break;
+
+       default:
+               dev_warn(&dev->intf->dev, "Unhandled extended command (%d)\n",
+                        cmd->header.cmd_no);
+               break;
+       }
+}
+
+static void kvaser_usb_hydra_handle_cmd(const struct kvaser_usb *dev,
+                                       const struct kvaser_cmd *cmd)
+{
+               if (cmd->header.cmd_no == CMD_EXTENDED)
+                       kvaser_usb_hydra_handle_cmd_ext
+                                       (dev, (struct kvaser_cmd_ext *)cmd);
+               else
+                       kvaser_usb_hydra_handle_cmd_std(dev, cmd);
+}
+
+static void *
+kvaser_usb_hydra_frame_to_cmd_ext(const struct kvaser_usb_net_priv *priv,
+                                 const struct sk_buff *skb, int *frame_len,
+                                 int *cmd_len, u16 transid)
+{
+       struct kvaser_usb *dev = priv->dev;
+       struct kvaser_cmd_ext *cmd;
+       struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+       u8 dlc = can_len2dlc(cf->len);
+       u8 nbr_of_bytes = cf->len;
+       u32 flags;
+       u32 id;
+       u32 kcan_id;
+       u32 kcan_header;
+
+       *frame_len = nbr_of_bytes;
+
+       cmd = kcalloc(1, sizeof(struct kvaser_cmd_ext), GFP_ATOMIC);
+       if (!cmd)
+               return NULL;
+
+       kvaser_usb_hydra_set_cmd_dest_he
+                       ((struct kvaser_cmd *)cmd,
+                        dev->card_data.hydra.channel_to_he[priv->channel]);
+       kvaser_usb_hydra_set_cmd_transid((struct kvaser_cmd *)cmd, transid);
+
+       cmd->header.cmd_no = CMD_EXTENDED;
+       cmd->cmd_no_ext = CMD_TX_CAN_MESSAGE_FD;
+
+       *cmd_len = ALIGN(sizeof(struct kvaser_cmd_ext) -
+                        sizeof(cmd->tx_can.kcan_payload) + nbr_of_bytes,
+                        8);
+
+       cmd->len = cpu_to_le16(*cmd_len);
+
+       cmd->tx_can.databytes = nbr_of_bytes;
+       cmd->tx_can.dlc = dlc;
+
+       if (cf->can_id & CAN_EFF_FLAG) {
+               id = cf->can_id & CAN_EFF_MASK;
+               flags = KVASER_USB_HYDRA_CF_FLAG_EXTENDED_ID;
+               kcan_id = (cf->can_id & CAN_EFF_MASK) |
+                         KVASER_USB_KCAN_DATA_IDE | KVASER_USB_KCAN_DATA_SRR;
+       } else {
+               id = cf->can_id & CAN_SFF_MASK;
+               flags = 0;
+               kcan_id = cf->can_id & CAN_SFF_MASK;
+       }
+
+       if (cf->can_id & CAN_ERR_FLAG)
+               flags |= KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME;
+
+       kcan_header = ((dlc << KVASER_USB_KCAN_DATA_DLC_SHIFT) &
+                               KVASER_USB_KCAN_DATA_DLC_MASK) |
+                       KVASER_USB_KCAN_DATA_AREQ |
+                       (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT ?
+                               KVASER_USB_KCAN_DATA_OSM : 0);
+
+       if (can_is_canfd_skb(skb)) {
+               kcan_header |= KVASER_USB_KCAN_DATA_FDF |
+                              (cf->flags & CANFD_BRS ?
+                                       KVASER_USB_KCAN_DATA_BRS : 0);
+       } else {
+               if (cf->can_id & CAN_RTR_FLAG) {
+                       kcan_id |= KVASER_USB_KCAN_DATA_RTR;
+                       cmd->tx_can.databytes = 0;
+                       flags |= KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME;
+               }
+       }
+
+       cmd->tx_can.kcan_id = cpu_to_le32(kcan_id);
+       cmd->tx_can.id = cpu_to_le32(id);
+       cmd->tx_can.flags = cpu_to_le32(flags);
+       cmd->tx_can.kcan_header = cpu_to_le32(kcan_header);
+
+       memcpy(cmd->tx_can.kcan_payload, cf->data, nbr_of_bytes);
+
+       return cmd;
+}
+
+static void *
+kvaser_usb_hydra_frame_to_cmd_std(const struct kvaser_usb_net_priv *priv,
+                                 const struct sk_buff *skb, int *frame_len,
+                                 int *cmd_len, u16 transid)
+{
+       struct kvaser_usb *dev = priv->dev;
+       struct kvaser_cmd *cmd;
+       struct can_frame *cf = (struct can_frame *)skb->data;
+       u32 flags;
+       u32 id;
+
+       *frame_len = cf->can_dlc;
+
+       cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_ATOMIC);
+       if (!cmd)
+               return NULL;
+
+       kvaser_usb_hydra_set_cmd_dest_he
+               (cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
+       kvaser_usb_hydra_set_cmd_transid(cmd, transid);
+
+       cmd->header.cmd_no = CMD_TX_CAN_MESSAGE;
+
+       *cmd_len = ALIGN(sizeof(struct kvaser_cmd), 8);
+
+       if (cf->can_id & CAN_EFF_FLAG) {
+               id = (cf->can_id & CAN_EFF_MASK);
+               id |= KVASER_USB_HYDRA_EXTENDED_FRAME_ID;
+       } else {
+               id = cf->can_id & CAN_SFF_MASK;
+       }
+
+       cmd->tx_can.dlc = cf->can_dlc;
+
+       flags = (cf->can_id & CAN_EFF_FLAG ?
+                KVASER_USB_HYDRA_CF_FLAG_EXTENDED_ID : 0);
+
+       if (cf->can_id & CAN_RTR_FLAG)
+               flags |= KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME;
+
+       flags |= (cf->can_id & CAN_ERR_FLAG ?
+                 KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME : 0);
+
+       cmd->tx_can.id = cpu_to_le32(id);
+       cmd->tx_can.flags = flags;
+
+       memcpy(cmd->tx_can.data, cf->data, *frame_len);
+
+       return cmd;
+}
+
+static int kvaser_usb_hydra_set_mode(struct net_device *netdev,
+                                    enum can_mode mode)
+{
+       int err = 0;
+
+       switch (mode) {
+       case CAN_MODE_START:
+               /* CAN controller automatically recovers from BUS_OFF */
+               break;
+       default:
+               err = -EOPNOTSUPP;
+       }
+
+       return err;
+}
+
+static int kvaser_usb_hydra_set_bittiming(struct net_device *netdev)
+{
+       struct kvaser_cmd *cmd;
+       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+       struct can_bittiming *bt = &priv->can.bittiming;
+       struct kvaser_usb *dev = priv->dev;
+       int tseg1 = bt->prop_seg + bt->phase_seg1;
+       int tseg2 = bt->phase_seg2;
+       int sjw = bt->sjw;
+       int err;
+
+       cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->header.cmd_no = CMD_SET_BUSPARAMS_REQ;
+       cmd->set_busparams_req.bitrate = cpu_to_le32(bt->bitrate);
+       cmd->set_busparams_req.sjw = (u8)sjw;
+       cmd->set_busparams_req.tseg1 = (u8)tseg1;
+       cmd->set_busparams_req.tseg2 = (u8)tseg2;
+       cmd->set_busparams_req.nsamples = 1;
+
+       kvaser_usb_hydra_set_cmd_dest_he
+               (cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
+       kvaser_usb_hydra_set_cmd_transid
+                               (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+       err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+
+       kfree(cmd);
+
+       return err;
+}
+
+static int kvaser_usb_hydra_set_data_bittiming(struct net_device *netdev)
+{
+       struct kvaser_cmd *cmd;
+       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+       struct can_bittiming *dbt = &priv->can.data_bittiming;
+       struct kvaser_usb *dev = priv->dev;
+       int tseg1 = dbt->prop_seg + dbt->phase_seg1;
+       int tseg2 = dbt->phase_seg2;
+       int sjw = dbt->sjw;
+       int err;
+
+       cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->header.cmd_no = CMD_SET_BUSPARAMS_FD_REQ;
+       cmd->set_busparams_req.bitrate_d = cpu_to_le32(dbt->bitrate);
+       cmd->set_busparams_req.sjw_d = (u8)sjw;
+       cmd->set_busparams_req.tseg1_d = (u8)tseg1;
+       cmd->set_busparams_req.tseg2_d = (u8)tseg2;
+       cmd->set_busparams_req.nsamples_d = 1;
+
+       if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+               if (priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO)
+                       cmd->set_busparams_req.canfd_mode =
+                                       KVASER_USB_HYDRA_BUS_MODE_NONISO;
+               else
+                       cmd->set_busparams_req.canfd_mode =
+                                       KVASER_USB_HYDRA_BUS_MODE_CANFD_ISO;
+       }
+
+       kvaser_usb_hydra_set_cmd_dest_he
+               (cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
+       kvaser_usb_hydra_set_cmd_transid
+                               (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+       err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+
+       kfree(cmd);
+
+       return err;
+}
+
+static int kvaser_usb_hydra_get_berr_counter(const struct net_device *netdev,
+                                            struct can_berr_counter *bec)
+{
+       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+       int err;
+
+       err = kvaser_usb_hydra_send_simple_cmd(priv->dev,
+                                              CMD_GET_CHIP_STATE_REQ,
+                                              priv->channel);
+       if (err)
+               return err;
+
+       *bec = priv->bec;
+
+       return 0;
+}
+
+static int kvaser_usb_hydra_setup_endpoints(struct kvaser_usb *dev)
+{
+       const struct usb_host_interface *iface_desc;
+       struct usb_endpoint_descriptor *ep;
+       int i;
+
+       iface_desc = &dev->intf->altsetting[0];
+
+       for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
+               ep = &iface_desc->endpoint[i].desc;
+
+               if (!dev->bulk_in && usb_endpoint_is_bulk_in(ep) &&
+                   ep->bEndpointAddress == KVASER_USB_HYDRA_BULK_EP_IN_ADDR)
+                       dev->bulk_in = ep;
+
+               if (!dev->bulk_out && usb_endpoint_is_bulk_out(ep) &&
+                   ep->bEndpointAddress == KVASER_USB_HYDRA_BULK_EP_OUT_ADDR)
+                       dev->bulk_out = ep;
+
+               if (dev->bulk_in && dev->bulk_out)
+                       return 0;
+       }
+
+       return -ENODEV;
+}
+
+static int kvaser_usb_hydra_init_card(struct kvaser_usb *dev)
+{
+       int err;
+       unsigned int i;
+       struct kvaser_usb_dev_card_data_hydra *card_data =
+                                                       &dev->card_data.hydra;
+
+       card_data->transid = KVASER_USB_HYDRA_MIN_TRANSID;
+       spin_lock_init(&card_data->transid_lock);
+
+       memset(card_data->usb_rx_leftover, 0, KVASER_USB_HYDRA_MAX_CMD_LEN);
+       card_data->usb_rx_leftover_len = 0;
+       spin_lock_init(&card_data->usb_rx_leftover_lock);
+
+       memset(card_data->channel_to_he, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL,
+              sizeof(card_data->channel_to_he));
+       card_data->sysdbg_he = 0;
+
+       for (i = 0; i < KVASER_USB_MAX_NET_DEVICES; i++) {
+               err = kvaser_usb_hydra_map_channel
+                                       (dev,
+                                        (KVASER_USB_HYDRA_TRANSID_CANHE | i),
+                                        i, "CAN");
+               if (err) {
+                       dev_err(&dev->intf->dev,
+                               "CMD_MAP_CHANNEL_REQ failed for CAN%u\n", i);
+                       return err;
+               }
+       }
+
+       err = kvaser_usb_hydra_map_channel(dev, KVASER_USB_HYDRA_TRANSID_SYSDBG,
+                                          0, "SYSDBG");
+       if (err) {
+               dev_err(&dev->intf->dev,
+                       "CMD_MAP_CHANNEL_REQ failed for SYSDBG\n");
+               return err;
+       }
+
+       return 0;
+}
+
+static int kvaser_usb_hydra_get_software_info(struct kvaser_usb *dev)
+{
+       struct kvaser_cmd cmd;
+       int err;
+
+       err = kvaser_usb_hydra_send_simple_cmd(dev, CMD_GET_SOFTWARE_INFO_REQ,
+                                              -1);
+       if (err)
+               return err;
+
+       memset(&cmd, 0, sizeof(struct kvaser_cmd));
+       err = kvaser_usb_hydra_wait_cmd(dev, CMD_GET_SOFTWARE_INFO_RESP, &cmd);
+       if (err)
+               return err;
+
+       dev->max_tx_urbs = min_t(unsigned int, KVASER_USB_MAX_TX_URBS,
+                                le16_to_cpu(cmd.sw_info.max_outstanding_tx));
+
+       return 0;
+}
+
+static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev)
+{
+       struct kvaser_cmd *cmd;
+       int err;
+       u32 flags;
+       struct kvaser_usb_dev_card_data *card_data = &dev->card_data;
+
+       cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->header.cmd_no = CMD_GET_SOFTWARE_DETAILS_REQ;
+       cmd->sw_detail_req.use_ext_cmd = 1;
+       kvaser_usb_hydra_set_cmd_dest_he
+                               (cmd, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL);
+
+       kvaser_usb_hydra_set_cmd_transid
+                               (cmd, kvaser_usb_hydra_get_next_transid(dev));
+
+       err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+       if (err)
+               goto end;
+
+       err = kvaser_usb_hydra_wait_cmd(dev, CMD_GET_SOFTWARE_DETAILS_RESP,
+                                       cmd);
+       if (err)
+               goto end;
+
+       dev->fw_version = le32_to_cpu(cmd->sw_detail_res.sw_version);
+       flags = le32_to_cpu(cmd->sw_detail_res.sw_flags);
+
+       if (flags & KVASER_USB_HYDRA_SW_FLAG_FW_BAD) {
+               dev_err(&dev->intf->dev,
+                       "Bad firmware, device refuse to run!\n");
+               err = -EINVAL;
+               goto end;
+       }
+
+       if (flags & KVASER_USB_HYDRA_SW_FLAG_FW_BETA)
+               dev_info(&dev->intf->dev, "Beta firmware in use\n");
+
+       if (flags & KVASER_USB_HYDRA_SW_FLAG_EXT_CAP)
+               card_data->capabilities |= KVASER_USB_CAP_EXT_CAP;
+
+       if (flags & KVASER_USB_HYDRA_SW_FLAG_EXT_CMD)
+               card_data->capabilities |= KVASER_USB_HYDRA_CAP_EXT_CMD;
+
+       if (flags & KVASER_USB_HYDRA_SW_FLAG_CANFD)
+               card_data->ctrlmode_supported |= CAN_CTRLMODE_FD;
+
+       if (flags & KVASER_USB_HYDRA_SW_FLAG_NONISO)
+               card_data->ctrlmode_supported |= CAN_CTRLMODE_FD_NON_ISO;
+
+       if (flags &  KVASER_USB_HYDRA_SW_FLAG_FREQ_80M)
+               dev->cfg = &kvaser_usb_hydra_dev_cfg_kcan;
+       else
+               dev->cfg = &kvaser_usb_hydra_dev_cfg_flexc;
+
+end:
+       kfree(cmd);
+
+       return err;
+}
+
+static int kvaser_usb_hydra_get_card_info(struct kvaser_usb *dev)
+{
+       struct kvaser_cmd cmd;
+       int err;
+
+       err = kvaser_usb_hydra_send_simple_cmd(dev, CMD_GET_CARD_INFO_REQ, -1);
+       if (err)
+               return err;
+
+       memset(&cmd, 0, sizeof(struct kvaser_cmd));
+       err = kvaser_usb_hydra_wait_cmd(dev, CMD_GET_CARD_INFO_RESP, &cmd);
+       if (err)
+               return err;
+
+       dev->nchannels = cmd.card_info.nchannels;
+       if (dev->nchannels > KVASER_USB_MAX_NET_DEVICES)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int kvaser_usb_hydra_get_capabilities(struct kvaser_usb *dev)
+{
+       int err;
+       u16 status;
+
+       if (!(dev->card_data.capabilities & KVASER_USB_CAP_EXT_CAP)) {
+               dev_info(&dev->intf->dev,
+                        "No extended capability support. Upgrade your device.\n");
+               return 0;
+       }
+
+       err = kvaser_usb_hydra_get_single_capability
+                                       (dev,
+                                        KVASER_USB_HYDRA_CAP_CMD_LISTEN_MODE,
+                                        &status);
+       if (err)
+               return err;
+       if (status)
+               dev_info(&dev->intf->dev,
+                        "KVASER_USB_HYDRA_CAP_CMD_LISTEN_MODE failed %u\n",
+                        status);
+
+       err = kvaser_usb_hydra_get_single_capability
+                                       (dev,
+                                        KVASER_USB_HYDRA_CAP_CMD_ERR_REPORT,
+                                        &status);
+       if (err)
+               return err;
+       if (status)
+               dev_info(&dev->intf->dev,
+                        "KVASER_USB_HYDRA_CAP_CMD_ERR_REPORT failed %u\n",
+                        status);
+
+       err = kvaser_usb_hydra_get_single_capability
+                                       (dev, KVASER_USB_HYDRA_CAP_CMD_ONE_SHOT,
+                                        &status);
+       if (err)
+               return err;
+       if (status)
+               dev_info(&dev->intf->dev,
+                        "KVASER_USB_HYDRA_CAP_CMD_ONE_SHOT failed %u\n",
+                        status);
+
+       return 0;
+}
+
+static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv)
+{
+       struct kvaser_usb *dev = priv->dev;
+       struct kvaser_cmd *cmd;
+       int err;
+
+       if ((priv->can.ctrlmode &
+           (CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO)) ==
+           CAN_CTRLMODE_FD_NON_ISO) {
+               netdev_warn(priv->netdev,
+                           "CTRLMODE_FD shall be on if CTRLMODE_FD_NON_ISO is on\n");
+               return -EINVAL;
+       }
+
+       cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->header.cmd_no = CMD_SET_DRIVERMODE_REQ;
+       kvaser_usb_hydra_set_cmd_dest_he
+               (cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
+       kvaser_usb_hydra_set_cmd_transid
+                               (cmd, kvaser_usb_hydra_get_next_transid(dev));
+       if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
+               cmd->set_ctrlmode.mode = KVASER_USB_HYDRA_CTRLMODE_LISTEN;
+       else
+               cmd->set_ctrlmode.mode = KVASER_USB_HYDRA_CTRLMODE_NORMAL;
+
+       err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+       kfree(cmd);
+
+       return err;
+}
+
+static int kvaser_usb_hydra_start_chip(struct kvaser_usb_net_priv *priv)
+{
+       int err;
+
+       init_completion(&priv->start_comp);
+
+       err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_START_CHIP_REQ,
+                                              priv->channel);
+       if (err)
+               return err;
+
+       if (!wait_for_completion_timeout(&priv->start_comp,
+                                        msecs_to_jiffies(KVASER_USB_TIMEOUT)))
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+static int kvaser_usb_hydra_stop_chip(struct kvaser_usb_net_priv *priv)
+{
+       int err;
+
+       init_completion(&priv->stop_comp);
+
+       /* Make sure we do not report invalid BUS_OFF from CMD_CHIP_STATE_EVENT
+        * see comment in kvaser_usb_hydra_update_state()
+        */
+       priv->can.state = CAN_STATE_STOPPED;
+
+       err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_STOP_CHIP_REQ,
+                                              priv->channel);
+       if (err)
+               return err;
+
+       if (!wait_for_completion_timeout(&priv->stop_comp,
+                                        msecs_to_jiffies(KVASER_USB_TIMEOUT)))
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+static int kvaser_usb_hydra_flush_queue(struct kvaser_usb_net_priv *priv)
+{
+       int err;
+
+       init_completion(&priv->flush_comp);
+
+       err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_FLUSH_QUEUE,
+                                              priv->channel);
+       if (err)
+               return err;
+
+       if (!wait_for_completion_timeout(&priv->flush_comp,
+                                        msecs_to_jiffies(KVASER_USB_TIMEOUT)))
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+/* A single extended hydra command can be transmitted in multiple transfers
+ * We have to buffer partial hydra commands, and handle them on next callback.
+ */
+static void kvaser_usb_hydra_read_bulk_callback(struct kvaser_usb *dev,
+                                               void *buf, int len)
+{
+       unsigned long irq_flags;
+       struct kvaser_cmd *cmd;
+       int pos = 0;
+       size_t cmd_len;
+       struct kvaser_usb_dev_card_data_hydra *card_data =
+                                                       &dev->card_data.hydra;
+       int usb_rx_leftover_len;
+       spinlock_t *usb_rx_leftover_lock = &card_data->usb_rx_leftover_lock;
+
+       spin_lock_irqsave(usb_rx_leftover_lock, irq_flags);
+       usb_rx_leftover_len = card_data->usb_rx_leftover_len;
+       if (usb_rx_leftover_len) {
+               int remaining_bytes;
+
+               cmd = (struct kvaser_cmd *)card_data->usb_rx_leftover;
+
+               cmd_len = kvaser_usb_hydra_cmd_size(cmd);
+
+               remaining_bytes = min_t(unsigned int, len,
+                                       cmd_len - usb_rx_leftover_len);
+               /* Make sure we do not overflow usb_rx_leftover */
+               if (remaining_bytes + usb_rx_leftover_len >
+                                               KVASER_USB_HYDRA_MAX_CMD_LEN) {
+                       dev_err(&dev->intf->dev, "Format error\n");
+                       spin_unlock_irqrestore(usb_rx_leftover_lock, irq_flags);
+                       return;
+               }
+
+               memcpy(card_data->usb_rx_leftover + usb_rx_leftover_len, buf,
+                      remaining_bytes);
+               pos += remaining_bytes;
+
+               if (remaining_bytes + usb_rx_leftover_len == cmd_len) {
+                       kvaser_usb_hydra_handle_cmd(dev, cmd);
+                       usb_rx_leftover_len = 0;
+               } else {
+                       /* Command still not complete */
+                       usb_rx_leftover_len += remaining_bytes;
+               }
+               card_data->usb_rx_leftover_len = usb_rx_leftover_len;
+       }
+       spin_unlock_irqrestore(usb_rx_leftover_lock, irq_flags);
+
+       while (pos < len) {
+               cmd = buf + pos;
+
+               cmd_len = kvaser_usb_hydra_cmd_size(cmd);
+
+               if (pos + cmd_len > len) {
+                       /* We got first part of a command */
+                       int leftover_bytes;
+
+                       leftover_bytes = len - pos;
+                       /* Make sure we do not overflow usb_rx_leftover */
+                       if (leftover_bytes > KVASER_USB_HYDRA_MAX_CMD_LEN) {
+                               dev_err(&dev->intf->dev, "Format error\n");
+                               return;
+                       }
+                       spin_lock_irqsave(usb_rx_leftover_lock, irq_flags);
+                       memcpy(card_data->usb_rx_leftover, buf + pos,
+                              leftover_bytes);
+                       card_data->usb_rx_leftover_len = leftover_bytes;
+                       spin_unlock_irqrestore(usb_rx_leftover_lock, irq_flags);
+                       break;
+               }
+
+               kvaser_usb_hydra_handle_cmd(dev, cmd);
+               pos += cmd_len;
+       }
+}
+
+static void *
+kvaser_usb_hydra_frame_to_cmd(const struct kvaser_usb_net_priv *priv,
+                             const struct sk_buff *skb, int *frame_len,
+                             int *cmd_len, u16 transid)
+{
+       void *buf;
+
+       if (priv->dev->card_data.capabilities & KVASER_USB_HYDRA_CAP_EXT_CMD)
+               buf = kvaser_usb_hydra_frame_to_cmd_ext(priv, skb, frame_len,
+                                                       cmd_len, transid);
+       else
+               buf = kvaser_usb_hydra_frame_to_cmd_std(priv, skb, frame_len,
+                                                       cmd_len, transid);
+
+       return buf;
+}
+
+const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops = {
+       .dev_set_mode = kvaser_usb_hydra_set_mode,
+       .dev_set_bittiming = kvaser_usb_hydra_set_bittiming,
+       .dev_set_data_bittiming = kvaser_usb_hydra_set_data_bittiming,
+       .dev_get_berr_counter = kvaser_usb_hydra_get_berr_counter,
+       .dev_setup_endpoints = kvaser_usb_hydra_setup_endpoints,
+       .dev_init_card = kvaser_usb_hydra_init_card,
+       .dev_get_software_info = kvaser_usb_hydra_get_software_info,
+       .dev_get_software_details = kvaser_usb_hydra_get_software_details,
+       .dev_get_card_info = kvaser_usb_hydra_get_card_info,
+       .dev_get_capabilities = kvaser_usb_hydra_get_capabilities,
+       .dev_set_opt_mode = kvaser_usb_hydra_set_opt_mode,
+       .dev_start_chip = kvaser_usb_hydra_start_chip,
+       .dev_stop_chip = kvaser_usb_hydra_stop_chip,
+       .dev_reset_chip = NULL,
+       .dev_flush_queue = kvaser_usb_hydra_flush_queue,
+       .dev_read_bulk_callback = kvaser_usb_hydra_read_bulk_callback,
+       .dev_frame_to_cmd = kvaser_usb_hydra_frame_to_cmd,
+};
+
+static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_kcan = {
+       .clock = {
+               .freq = 80000000,
+       },
+       .timestamp_freq = 80,
+       .bittiming_const = &kvaser_usb_hydra_kcan_bittiming_c,
+       .data_bittiming_const = &kvaser_usb_hydra_kcan_bittiming_c,
+};
+
+static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_flexc = {
+       .clock = {
+               .freq = 24000000,
+       },
+       .timestamp_freq = 1,
+       .bittiming_const = &kvaser_usb_hydra_flexc_bittiming_c,
+};
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c
new file mode 100644 (file)
index 0000000..07d2f3a
--- /dev/null
@@ -0,0 +1,1358 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Parts of this driver are based on the following:
+ *  - Kvaser linux leaf driver (version 4.78)
+ *  - CAN driver for esd CAN-USB/2
+ *  - Kvaser linux usbcanII driver (version 5.3)
+ *
+ * Copyright (C) 2002-2018 KVASER AB, Sweden. All rights reserved.
+ * Copyright (C) 2010 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh
+ * Copyright (C) 2012 Olivier Sobrie <olivier@sobrie.be>
+ * Copyright (C) 2015 Valeo S.A.
+ */
+
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/gfp.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/usb.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/can/netlink.h>
+
+#include "kvaser_usb.h"
+
+/* Forward declaration */
+static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg;
+
+#define CAN_USB_CLOCK                  8000000
+#define MAX_USBCAN_NET_DEVICES         2
+
+/* Command header size */
+#define CMD_HEADER_LEN                 2
+
+/* Kvaser CAN message flags */
+#define MSG_FLAG_ERROR_FRAME           BIT(0)
+#define MSG_FLAG_OVERRUN               BIT(1)
+#define MSG_FLAG_NERR                  BIT(2)
+#define MSG_FLAG_WAKEUP                        BIT(3)
+#define MSG_FLAG_REMOTE_FRAME          BIT(4)
+#define MSG_FLAG_RESERVED              BIT(5)
+#define MSG_FLAG_TX_ACK                        BIT(6)
+#define MSG_FLAG_TX_REQUEST            BIT(7)
+
+/* CAN states (M16C CxSTRH register) */
+#define M16C_STATE_BUS_RESET           BIT(0)
+#define M16C_STATE_BUS_ERROR           BIT(4)
+#define M16C_STATE_BUS_PASSIVE         BIT(5)
+#define M16C_STATE_BUS_OFF             BIT(6)
+
+/* Leaf/usbcan command ids */
+#define CMD_RX_STD_MESSAGE             12
+#define CMD_TX_STD_MESSAGE             13
+#define CMD_RX_EXT_MESSAGE             14
+#define CMD_TX_EXT_MESSAGE             15
+#define CMD_SET_BUS_PARAMS             16
+#define CMD_CHIP_STATE_EVENT           20
+#define CMD_SET_CTRL_MODE              21
+#define CMD_RESET_CHIP                 24
+#define CMD_START_CHIP                 26
+#define CMD_START_CHIP_REPLY           27
+#define CMD_STOP_CHIP                  28
+#define CMD_STOP_CHIP_REPLY            29
+
+#define CMD_USBCAN_CLOCK_OVERFLOW_EVENT        33
+
+#define CMD_GET_CARD_INFO              34
+#define CMD_GET_CARD_INFO_REPLY                35
+#define CMD_GET_SOFTWARE_INFO          38
+#define CMD_GET_SOFTWARE_INFO_REPLY    39
+#define CMD_FLUSH_QUEUE                        48
+#define CMD_TX_ACKNOWLEDGE             50
+#define CMD_CAN_ERROR_EVENT            51
+#define CMD_FLUSH_QUEUE_REPLY          68
+
+#define CMD_LEAF_LOG_MESSAGE           106
+
+/* error factors */
+#define M16C_EF_ACKE                   BIT(0)
+#define M16C_EF_CRCE                   BIT(1)
+#define M16C_EF_FORME                  BIT(2)
+#define M16C_EF_STFE                   BIT(3)
+#define M16C_EF_BITE0                  BIT(4)
+#define M16C_EF_BITE1                  BIT(5)
+#define M16C_EF_RCVE                   BIT(6)
+#define M16C_EF_TRE                    BIT(7)
+
+/* Only Leaf-based devices can report M16C error factors,
+ * thus define our own error status flags for USBCANII
+ */
+#define USBCAN_ERROR_STATE_NONE                0
+#define USBCAN_ERROR_STATE_TX_ERROR    BIT(0)
+#define USBCAN_ERROR_STATE_RX_ERROR    BIT(1)
+#define USBCAN_ERROR_STATE_BUSERROR    BIT(2)
+
+/* bittiming parameters */
+#define KVASER_USB_TSEG1_MIN           1
+#define KVASER_USB_TSEG1_MAX           16
+#define KVASER_USB_TSEG2_MIN           1
+#define KVASER_USB_TSEG2_MAX           8
+#define KVASER_USB_SJW_MAX             4
+#define KVASER_USB_BRP_MIN             1
+#define KVASER_USB_BRP_MAX             64
+#define KVASER_USB_BRP_INC             1
+
+/* ctrl modes */
+#define KVASER_CTRL_MODE_NORMAL                1
+#define KVASER_CTRL_MODE_SILENT                2
+#define KVASER_CTRL_MODE_SELFRECEPTION 3
+#define KVASER_CTRL_MODE_OFF           4
+
+/* Extended CAN identifier flag */
+#define KVASER_EXTENDED_FRAME          BIT(31)
+
+struct kvaser_cmd_simple {
+       u8 tid;
+       u8 channel;
+} __packed;
+
+struct kvaser_cmd_cardinfo {
+       u8 tid;
+       u8 nchannels;
+       __le32 serial_number;
+       __le32 padding0;
+       __le32 clock_resolution;
+       __le32 mfgdate;
+       u8 ean[8];
+       u8 hw_revision;
+       union {
+               struct {
+                       u8 usb_hs_mode;
+               } __packed leaf1;
+               struct {
+                       u8 padding;
+               } __packed usbcan1;
+       } __packed;
+       __le16 padding1;
+} __packed;
+
+struct leaf_cmd_softinfo {
+       u8 tid;
+       u8 padding0;
+       __le32 sw_options;
+       __le32 fw_version;
+       __le16 max_outstanding_tx;
+       __le16 padding1[9];
+} __packed;
+
+struct usbcan_cmd_softinfo {
+       u8 tid;
+       u8 fw_name[5];
+       __le16 max_outstanding_tx;
+       u8 padding[6];
+       __le32 fw_version;
+       __le16 checksum;
+       __le16 sw_options;
+} __packed;
+
+struct kvaser_cmd_busparams {
+       u8 tid;
+       u8 channel;
+       __le32 bitrate;
+       u8 tseg1;
+       u8 tseg2;
+       u8 sjw;
+       u8 no_samp;
+} __packed;
+
+struct kvaser_cmd_tx_can {
+       u8 channel;
+       u8 tid;
+       u8 data[14];
+       union {
+               struct {
+                       u8 padding;
+                       u8 flags;
+               } __packed leaf;
+               struct {
+                       u8 flags;
+                       u8 padding;
+               } __packed usbcan;
+       } __packed;
+} __packed;
+
+struct kvaser_cmd_rx_can_header {
+       u8 channel;
+       u8 flag;
+} __packed;
+
+struct leaf_cmd_rx_can {
+       u8 channel;
+       u8 flag;
+
+       __le16 time[3];
+       u8 data[14];
+} __packed;
+
+struct usbcan_cmd_rx_can {
+       u8 channel;
+       u8 flag;
+
+       u8 data[14];
+       __le16 time;
+} __packed;
+
+struct leaf_cmd_chip_state_event {
+       u8 tid;
+       u8 channel;
+
+       __le16 time[3];
+       u8 tx_errors_count;
+       u8 rx_errors_count;
+
+       u8 status;
+       u8 padding[3];
+} __packed;
+
+struct usbcan_cmd_chip_state_event {
+       u8 tid;
+       u8 channel;
+
+       u8 tx_errors_count;
+       u8 rx_errors_count;
+       __le16 time;
+
+       u8 status;
+       u8 padding[3];
+} __packed;
+
+struct kvaser_cmd_tx_acknowledge_header {
+       u8 channel;
+       u8 tid;
+} __packed;
+
+struct leaf_cmd_error_event {
+       u8 tid;
+       u8 flags;
+       __le16 time[3];
+       u8 channel;
+       u8 padding;
+       u8 tx_errors_count;
+       u8 rx_errors_count;
+       u8 status;
+       u8 error_factor;
+} __packed;
+
+struct usbcan_cmd_error_event {
+       u8 tid;
+       u8 padding;
+       u8 tx_errors_count_ch0;
+       u8 rx_errors_count_ch0;
+       u8 tx_errors_count_ch1;
+       u8 rx_errors_count_ch1;
+       u8 status_ch0;
+       u8 status_ch1;
+       __le16 time;
+} __packed;
+
+struct kvaser_cmd_ctrl_mode {
+       u8 tid;
+       u8 channel;
+       u8 ctrl_mode;
+       u8 padding[3];
+} __packed;
+
+struct kvaser_cmd_flush_queue {
+       u8 tid;
+       u8 channel;
+       u8 flags;
+       u8 padding[3];
+} __packed;
+
+struct leaf_cmd_log_message {
+       u8 channel;
+       u8 flags;
+       __le16 time[3];
+       u8 dlc;
+       u8 time_offset;
+       __le32 id;
+       u8 data[8];
+} __packed;
+
+struct kvaser_cmd {
+       u8 len;
+       u8 id;
+       union   {
+               struct kvaser_cmd_simple simple;
+               struct kvaser_cmd_cardinfo cardinfo;
+               struct kvaser_cmd_busparams busparams;
+
+               struct kvaser_cmd_rx_can_header rx_can_header;
+               struct kvaser_cmd_tx_acknowledge_header tx_acknowledge_header;
+
+               union {
+                       struct leaf_cmd_softinfo softinfo;
+                       struct leaf_cmd_rx_can rx_can;
+                       struct leaf_cmd_chip_state_event chip_state_event;
+                       struct leaf_cmd_error_event error_event;
+                       struct leaf_cmd_log_message log_message;
+               } __packed leaf;
+
+               union {
+                       struct usbcan_cmd_softinfo softinfo;
+                       struct usbcan_cmd_rx_can rx_can;
+                       struct usbcan_cmd_chip_state_event chip_state_event;
+                       struct usbcan_cmd_error_event error_event;
+               } __packed usbcan;
+
+               struct kvaser_cmd_tx_can tx_can;
+               struct kvaser_cmd_ctrl_mode ctrl_mode;
+               struct kvaser_cmd_flush_queue flush_queue;
+       } u;
+} __packed;
+
+/* Summary of a kvaser error event, for a unified Leaf/Usbcan error
+ * handling. Some discrepancies between the two families exist:
+ *
+ * - USBCAN firmware does not report M16C "error factors"
+ * - USBCAN controllers has difficulties reporting if the raised error
+ *   event is for ch0 or ch1. They leave such arbitration to the OS
+ *   driver by letting it compare error counters with previous values
+ *   and decide the error event's channel. Thus for USBCAN, the channel
+ *   field is only advisory.
+ */
+struct kvaser_usb_err_summary {
+       u8 channel, status, txerr, rxerr;
+       union {
+               struct {
+                       u8 error_factor;
+               } leaf;
+               struct {
+                       u8 other_ch_status;
+                       u8 error_state;
+               } usbcan;
+       };
+};
+
+static void *
+kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv,
+                            const struct sk_buff *skb, int *frame_len,
+                            int *cmd_len, u16 transid)
+{
+       struct kvaser_usb *dev = priv->dev;
+       struct kvaser_cmd *cmd;
+       u8 *cmd_tx_can_flags = NULL;            /* GCC */
+       struct can_frame *cf = (struct can_frame *)skb->data;
+
+       *frame_len = cf->can_dlc;
+
+       cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
+       if (cmd) {
+               cmd->u.tx_can.tid = transid & 0xff;
+               cmd->len = *cmd_len = CMD_HEADER_LEN +
+                                     sizeof(struct kvaser_cmd_tx_can);
+               cmd->u.tx_can.channel = priv->channel;
+
+               switch (dev->card_data.leaf.family) {
+               case KVASER_LEAF:
+                       cmd_tx_can_flags = &cmd->u.tx_can.leaf.flags;
+                       break;
+               case KVASER_USBCAN:
+                       cmd_tx_can_flags = &cmd->u.tx_can.usbcan.flags;
+                       break;
+               }
+
+               *cmd_tx_can_flags = 0;
+
+               if (cf->can_id & CAN_EFF_FLAG) {
+                       cmd->id = CMD_TX_EXT_MESSAGE;
+                       cmd->u.tx_can.data[0] = (cf->can_id >> 24) & 0x1f;
+                       cmd->u.tx_can.data[1] = (cf->can_id >> 18) & 0x3f;
+                       cmd->u.tx_can.data[2] = (cf->can_id >> 14) & 0x0f;
+                       cmd->u.tx_can.data[3] = (cf->can_id >> 6) & 0xff;
+                       cmd->u.tx_can.data[4] = cf->can_id & 0x3f;
+               } else {
+                       cmd->id = CMD_TX_STD_MESSAGE;
+                       cmd->u.tx_can.data[0] = (cf->can_id >> 6) & 0x1f;
+                       cmd->u.tx_can.data[1] = cf->can_id & 0x3f;
+               }
+
+               cmd->u.tx_can.data[5] = cf->can_dlc;
+               memcpy(&cmd->u.tx_can.data[6], cf->data, cf->can_dlc);
+
+               if (cf->can_id & CAN_RTR_FLAG)
+                       *cmd_tx_can_flags |= MSG_FLAG_REMOTE_FRAME;
+       }
+       return cmd;
+}
+
+static int kvaser_usb_leaf_wait_cmd(const struct kvaser_usb *dev, u8 id,
+                                   struct kvaser_cmd *cmd)
+{
+       struct kvaser_cmd *tmp;
+       void *buf;
+       int actual_len;
+       int err;
+       int pos;
+       unsigned long to = jiffies + msecs_to_jiffies(KVASER_USB_TIMEOUT);
+
+       buf = kzalloc(KVASER_USB_RX_BUFFER_SIZE, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       do {
+               err = kvaser_usb_recv_cmd(dev, buf, KVASER_USB_RX_BUFFER_SIZE,
+                                         &actual_len);
+               if (err < 0)
+                       goto end;
+
+               pos = 0;
+               while (pos <= actual_len - CMD_HEADER_LEN) {
+                       tmp = buf + pos;
+
+                       /* Handle commands crossing the USB endpoint max packet
+                        * size boundary. Check kvaser_usb_read_bulk_callback()
+                        * for further details.
+                        */
+                       if (tmp->len == 0) {
+                               pos = round_up(pos,
+                                              le16_to_cpu
+                                               (dev->bulk_in->wMaxPacketSize));
+                               continue;
+                       }
+
+                       if (pos + tmp->len > actual_len) {
+                               dev_err_ratelimited(&dev->intf->dev,
+                                                   "Format error\n");
+                               break;
+                       }
+
+                       if (tmp->id == id) {
+                               memcpy(cmd, tmp, tmp->len);
+                               goto end;
+                       }
+
+                       pos += tmp->len;
+               }
+       } while (time_before(jiffies, to));
+
+       err = -EINVAL;
+
+end:
+       kfree(buf);
+
+       return err;
+}
+
+static int kvaser_usb_leaf_send_simple_cmd(const struct kvaser_usb *dev,
+                                          u8 cmd_id, int channel)
+{
+       struct kvaser_cmd *cmd;
+       int rc;
+
+       cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->id = cmd_id;
+       cmd->len = CMD_HEADER_LEN + sizeof(struct kvaser_cmd_simple);
+       cmd->u.simple.channel = channel;
+       cmd->u.simple.tid = 0xff;
+
+       rc = kvaser_usb_send_cmd(dev, cmd, cmd->len);
+
+       kfree(cmd);
+       return rc;
+}
+
+static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev)
+{
+       struct kvaser_cmd cmd;
+       int err;
+
+       err = kvaser_usb_leaf_send_simple_cmd(dev, CMD_GET_SOFTWARE_INFO, 0);
+       if (err)
+               return err;
+
+       err = kvaser_usb_leaf_wait_cmd(dev, CMD_GET_SOFTWARE_INFO_REPLY, &cmd);
+       if (err)
+               return err;
+
+       switch (dev->card_data.leaf.family) {
+       case KVASER_LEAF:
+               dev->fw_version = le32_to_cpu(cmd.u.leaf.softinfo.fw_version);
+               dev->max_tx_urbs =
+                       le16_to_cpu(cmd.u.leaf.softinfo.max_outstanding_tx);
+               break;
+       case KVASER_USBCAN:
+               dev->fw_version = le32_to_cpu(cmd.u.usbcan.softinfo.fw_version);
+               dev->max_tx_urbs =
+                       le16_to_cpu(cmd.u.usbcan.softinfo.max_outstanding_tx);
+               break;
+       }
+
+       return 0;
+}
+
+static int kvaser_usb_leaf_get_software_info(struct kvaser_usb *dev)
+{
+       int err;
+       int retry = 3;
+
+       /* On some x86 laptops, plugging a Kvaser device again after
+        * an unplug makes the firmware always ignore the very first
+        * command. For such a case, provide some room for retries
+        * instead of completely exiting the driver.
+        */
+       do {
+               err = kvaser_usb_leaf_get_software_info_inner(dev);
+       } while (--retry && err == -ETIMEDOUT);
+
+       return err;
+}
+
+static int kvaser_usb_leaf_get_card_info(struct kvaser_usb *dev)
+{
+       struct kvaser_cmd cmd;
+       int err;
+
+       err = kvaser_usb_leaf_send_simple_cmd(dev, CMD_GET_CARD_INFO, 0);
+       if (err)
+               return err;
+
+       err = kvaser_usb_leaf_wait_cmd(dev, CMD_GET_CARD_INFO_REPLY, &cmd);
+       if (err)
+               return err;
+
+       dev->nchannels = cmd.u.cardinfo.nchannels;
+       if (dev->nchannels > KVASER_USB_MAX_NET_DEVICES ||
+           (dev->card_data.leaf.family == KVASER_USBCAN &&
+            dev->nchannels > MAX_USBCAN_NET_DEVICES))
+               return -EINVAL;
+
+       return 0;
+}
+
+static void kvaser_usb_leaf_tx_acknowledge(const struct kvaser_usb *dev,
+                                          const struct kvaser_cmd *cmd)
+{
+       struct net_device_stats *stats;
+       struct kvaser_usb_tx_urb_context *context;
+       struct kvaser_usb_net_priv *priv;
+       unsigned long flags;
+       u8 channel, tid;
+
+       channel = cmd->u.tx_acknowledge_header.channel;
+       tid = cmd->u.tx_acknowledge_header.tid;
+
+       if (channel >= dev->nchannels) {
+               dev_err(&dev->intf->dev,
+                       "Invalid channel number (%d)\n", channel);
+               return;
+       }
+
+       priv = dev->nets[channel];
+
+       if (!netif_device_present(priv->netdev))
+               return;
+
+       stats = &priv->netdev->stats;
+
+       context = &priv->tx_contexts[tid % dev->max_tx_urbs];
+
+       /* Sometimes the state change doesn't come after a bus-off event */
+       if (priv->can.restart_ms && priv->can.state >= CAN_STATE_BUS_OFF) {
+               struct sk_buff *skb;
+               struct can_frame *cf;
+
+               skb = alloc_can_err_skb(priv->netdev, &cf);
+               if (skb) {
+                       cf->can_id |= CAN_ERR_RESTARTED;
+
+                       stats->rx_packets++;
+                       stats->rx_bytes += cf->can_dlc;
+                       netif_rx(skb);
+               } else {
+                       netdev_err(priv->netdev,
+                                  "No memory left for err_skb\n");
+               }
+
+               priv->can.can_stats.restarts++;
+               netif_carrier_on(priv->netdev);
+
+               priv->can.state = CAN_STATE_ERROR_ACTIVE;
+       }
+
+       stats->tx_packets++;
+       stats->tx_bytes += context->dlc;
+
+       spin_lock_irqsave(&priv->tx_contexts_lock, flags);
+
+       can_get_echo_skb(priv->netdev, context->echo_index);
+       context->echo_index = dev->max_tx_urbs;
+       --priv->active_tx_contexts;
+       netif_wake_queue(priv->netdev);
+
+       spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
+}
+
+static int kvaser_usb_leaf_simple_cmd_async(struct kvaser_usb_net_priv *priv,
+                                           u8 cmd_id)
+{
+       struct kvaser_cmd *cmd;
+       int err;
+
+       cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->len = CMD_HEADER_LEN + sizeof(struct kvaser_cmd_simple);
+       cmd->id = cmd_id;
+       cmd->u.simple.channel = priv->channel;
+
+       err = kvaser_usb_send_cmd_async(priv, cmd, cmd->len);
+       if (err)
+               kfree(cmd);
+
+       return err;
+}
+
+static void
+kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv,
+                                       const struct kvaser_usb_err_summary *es,
+                                       struct can_frame *cf)
+{
+       struct kvaser_usb *dev = priv->dev;
+       struct net_device_stats *stats = &priv->netdev->stats;
+       enum can_state cur_state, new_state, tx_state, rx_state;
+
+       netdev_dbg(priv->netdev, "Error status: 0x%02x\n", es->status);
+
+       new_state = priv->can.state;
+       cur_state = priv->can.state;
+
+       if (es->status & (M16C_STATE_BUS_OFF | M16C_STATE_BUS_RESET)) {
+               new_state = CAN_STATE_BUS_OFF;
+       } else if (es->status & M16C_STATE_BUS_PASSIVE) {
+               new_state = CAN_STATE_ERROR_PASSIVE;
+       } else if (es->status & M16C_STATE_BUS_ERROR) {
+               /* Guard against spurious error events after a busoff */
+               if (cur_state < CAN_STATE_BUS_OFF) {
+                       if (es->txerr >= 128 || es->rxerr >= 128)
+                               new_state = CAN_STATE_ERROR_PASSIVE;
+                       else if (es->txerr >= 96 || es->rxerr >= 96)
+                               new_state = CAN_STATE_ERROR_WARNING;
+                       else if (cur_state > CAN_STATE_ERROR_ACTIVE)
+                               new_state = CAN_STATE_ERROR_ACTIVE;
+               }
+       }
+
+       if (!es->status)
+               new_state = CAN_STATE_ERROR_ACTIVE;
+
+       if (new_state != cur_state) {
+               tx_state = (es->txerr >= es->rxerr) ? new_state : 0;
+               rx_state = (es->txerr <= es->rxerr) ? new_state : 0;
+
+               can_change_state(priv->netdev, cf, tx_state, rx_state);
+       }
+
+       if (priv->can.restart_ms &&
+           cur_state >= CAN_STATE_BUS_OFF &&
+           new_state < CAN_STATE_BUS_OFF)
+               priv->can.can_stats.restarts++;
+
+       switch (dev->card_data.leaf.family) {
+       case KVASER_LEAF:
+               if (es->leaf.error_factor) {
+                       priv->can.can_stats.bus_error++;
+                       stats->rx_errors++;
+               }
+               break;
+       case KVASER_USBCAN:
+               if (es->usbcan.error_state & USBCAN_ERROR_STATE_TX_ERROR)
+                       stats->tx_errors++;
+               if (es->usbcan.error_state & USBCAN_ERROR_STATE_RX_ERROR)
+                       stats->rx_errors++;
+               if (es->usbcan.error_state & USBCAN_ERROR_STATE_BUSERROR)
+                       priv->can.can_stats.bus_error++;
+               break;
+       }
+
+       priv->bec.txerr = es->txerr;
+       priv->bec.rxerr = es->rxerr;
+}
+
+static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev,
+                                    const struct kvaser_usb_err_summary *es)
+{
+       struct can_frame *cf;
+       struct can_frame tmp_cf = { .can_id = CAN_ERR_FLAG,
+                                   .can_dlc = CAN_ERR_DLC };
+       struct sk_buff *skb;
+       struct net_device_stats *stats;
+       struct kvaser_usb_net_priv *priv;
+       enum can_state old_state, new_state;
+
+       if (es->channel >= dev->nchannels) {
+               dev_err(&dev->intf->dev,
+                       "Invalid channel number (%d)\n", es->channel);
+               return;
+       }
+
+       priv = dev->nets[es->channel];
+       stats = &priv->netdev->stats;
+
+       /* Update all of the CAN interface's state and error counters before
+        * trying any memory allocation that can actually fail with -ENOMEM.
+        *
+        * We send a temporary stack-allocated error CAN frame to
+        * can_change_state() for the very same reason.
+        *
+        * TODO: Split can_change_state() responsibility between updating the
+        * CAN interface's state and counters, and the setting up of CAN error
+        * frame ID and data to userspace. Remove stack allocation afterwards.
+        */
+       old_state = priv->can.state;
+       kvaser_usb_leaf_rx_error_update_can_state(priv, es, &tmp_cf);
+       new_state = priv->can.state;
+
+       skb = alloc_can_err_skb(priv->netdev, &cf);
+       if (!skb) {
+               stats->rx_dropped++;
+               return;
+       }
+       memcpy(cf, &tmp_cf, sizeof(*cf));
+
+       if (new_state != old_state) {
+               if (es->status &
+                   (M16C_STATE_BUS_OFF | M16C_STATE_BUS_RESET)) {
+                       if (!priv->can.restart_ms)
+                               kvaser_usb_leaf_simple_cmd_async(priv,
+                                                                CMD_STOP_CHIP);
+                       netif_carrier_off(priv->netdev);
+               }
+
+               if (priv->can.restart_ms &&
+                   old_state >= CAN_STATE_BUS_OFF &&
+                   new_state < CAN_STATE_BUS_OFF) {
+                       cf->can_id |= CAN_ERR_RESTARTED;
+                       netif_carrier_on(priv->netdev);
+               }
+       }
+
+       switch (dev->card_data.leaf.family) {
+       case KVASER_LEAF:
+               if (es->leaf.error_factor) {
+                       cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
+
+                       if (es->leaf.error_factor & M16C_EF_ACKE)
+                               cf->data[3] = CAN_ERR_PROT_LOC_ACK;
+                       if (es->leaf.error_factor & M16C_EF_CRCE)
+                               cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
+                       if (es->leaf.error_factor & M16C_EF_FORME)
+                               cf->data[2] |= CAN_ERR_PROT_FORM;
+                       if (es->leaf.error_factor & M16C_EF_STFE)
+                               cf->data[2] |= CAN_ERR_PROT_STUFF;
+                       if (es->leaf.error_factor & M16C_EF_BITE0)
+                               cf->data[2] |= CAN_ERR_PROT_BIT0;
+                       if (es->leaf.error_factor & M16C_EF_BITE1)
+                               cf->data[2] |= CAN_ERR_PROT_BIT1;
+                       if (es->leaf.error_factor & M16C_EF_TRE)
+                               cf->data[2] |= CAN_ERR_PROT_TX;
+               }
+               break;
+       case KVASER_USBCAN:
+               if (es->usbcan.error_state & USBCAN_ERROR_STATE_BUSERROR)
+                       cf->can_id |= CAN_ERR_BUSERROR;
+               break;
+       }
+
+       cf->data[6] = es->txerr;
+       cf->data[7] = es->rxerr;
+
+       stats->rx_packets++;
+       stats->rx_bytes += cf->can_dlc;
+       netif_rx(skb);
+}
+
+/* For USBCAN, report error to userspace if the channels's errors counter
+ * has changed, or we're the only channel seeing a bus error state.
+ */
+static void
+kvaser_usb_leaf_usbcan_conditionally_rx_error(const struct kvaser_usb *dev,
+                                             struct kvaser_usb_err_summary *es)
+{
+       struct kvaser_usb_net_priv *priv;
+       unsigned int channel;
+       bool report_error;
+
+       channel = es->channel;
+       if (channel >= dev->nchannels) {
+               dev_err(&dev->intf->dev,
+                       "Invalid channel number (%d)\n", channel);
+               return;
+       }
+
+       priv = dev->nets[channel];
+       report_error = false;
+
+       if (es->txerr != priv->bec.txerr) {
+               es->usbcan.error_state |= USBCAN_ERROR_STATE_TX_ERROR;
+               report_error = true;
+       }
+       if (es->rxerr != priv->bec.rxerr) {
+               es->usbcan.error_state |= USBCAN_ERROR_STATE_RX_ERROR;
+               report_error = true;
+       }
+       if ((es->status & M16C_STATE_BUS_ERROR) &&
+           !(es->usbcan.other_ch_status & M16C_STATE_BUS_ERROR)) {
+               es->usbcan.error_state |= USBCAN_ERROR_STATE_BUSERROR;
+               report_error = true;
+       }
+
+       if (report_error)
+               kvaser_usb_leaf_rx_error(dev, es);
+}
+
+static void kvaser_usb_leaf_usbcan_rx_error(const struct kvaser_usb *dev,
+                                           const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_err_summary es = { };
+
+       switch (cmd->id) {
+       /* Sometimes errors are sent as unsolicited chip state events */
+       case CMD_CHIP_STATE_EVENT:
+               es.channel = cmd->u.usbcan.chip_state_event.channel;
+               es.status = cmd->u.usbcan.chip_state_event.status;
+               es.txerr = cmd->u.usbcan.chip_state_event.tx_errors_count;
+               es.rxerr = cmd->u.usbcan.chip_state_event.rx_errors_count;
+               kvaser_usb_leaf_usbcan_conditionally_rx_error(dev, &es);
+               break;
+
+       case CMD_CAN_ERROR_EVENT:
+               es.channel = 0;
+               es.status = cmd->u.usbcan.error_event.status_ch0;
+               es.txerr = cmd->u.usbcan.error_event.tx_errors_count_ch0;
+               es.rxerr = cmd->u.usbcan.error_event.rx_errors_count_ch0;
+               es.usbcan.other_ch_status =
+                       cmd->u.usbcan.error_event.status_ch1;
+               kvaser_usb_leaf_usbcan_conditionally_rx_error(dev, &es);
+
+               /* The USBCAN firmware supports up to 2 channels.
+                * Now that ch0 was checked, check if ch1 has any errors.
+                */
+               if (dev->nchannels == MAX_USBCAN_NET_DEVICES) {
+                       es.channel = 1;
+                       es.status = cmd->u.usbcan.error_event.status_ch1;
+                       es.txerr =
+                               cmd->u.usbcan.error_event.tx_errors_count_ch1;
+                       es.rxerr =
+                               cmd->u.usbcan.error_event.rx_errors_count_ch1;
+                       es.usbcan.other_ch_status =
+                               cmd->u.usbcan.error_event.status_ch0;
+                       kvaser_usb_leaf_usbcan_conditionally_rx_error(dev, &es);
+               }
+               break;
+
+       default:
+               dev_err(&dev->intf->dev, "Invalid cmd id (%d)\n", cmd->id);
+       }
+}
+
+static void kvaser_usb_leaf_leaf_rx_error(const struct kvaser_usb *dev,
+                                         const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_err_summary es = { };
+
+       switch (cmd->id) {
+       case CMD_CAN_ERROR_EVENT:
+               es.channel = cmd->u.leaf.error_event.channel;
+               es.status = cmd->u.leaf.error_event.status;
+               es.txerr = cmd->u.leaf.error_event.tx_errors_count;
+               es.rxerr = cmd->u.leaf.error_event.rx_errors_count;
+               es.leaf.error_factor = cmd->u.leaf.error_event.error_factor;
+               break;
+       case CMD_LEAF_LOG_MESSAGE:
+               es.channel = cmd->u.leaf.log_message.channel;
+               es.status = cmd->u.leaf.log_message.data[0];
+               es.txerr = cmd->u.leaf.log_message.data[2];
+               es.rxerr = cmd->u.leaf.log_message.data[3];
+               es.leaf.error_factor = cmd->u.leaf.log_message.data[1];
+               break;
+       case CMD_CHIP_STATE_EVENT:
+               es.channel = cmd->u.leaf.chip_state_event.channel;
+               es.status = cmd->u.leaf.chip_state_event.status;
+               es.txerr = cmd->u.leaf.chip_state_event.tx_errors_count;
+               es.rxerr = cmd->u.leaf.chip_state_event.rx_errors_count;
+               es.leaf.error_factor = 0;
+               break;
+       default:
+               dev_err(&dev->intf->dev, "Invalid cmd id (%d)\n", cmd->id);
+               return;
+       }
+
+       kvaser_usb_leaf_rx_error(dev, &es);
+}
+
+static void kvaser_usb_leaf_rx_can_err(const struct kvaser_usb_net_priv *priv,
+                                      const struct kvaser_cmd *cmd)
+{
+       if (cmd->u.rx_can_header.flag & (MSG_FLAG_ERROR_FRAME |
+                                        MSG_FLAG_NERR)) {
+               struct net_device_stats *stats = &priv->netdev->stats;
+
+               netdev_err(priv->netdev, "Unknown error (flags: 0x%02x)\n",
+                          cmd->u.rx_can_header.flag);
+
+               stats->rx_errors++;
+               return;
+       }
+
+       if (cmd->u.rx_can_header.flag & MSG_FLAG_OVERRUN)
+               kvaser_usb_can_rx_over_error(priv->netdev);
+}
+
+static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev,
+                                      const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_net_priv *priv;
+       struct can_frame *cf;
+       struct sk_buff *skb;
+       struct net_device_stats *stats;
+       u8 channel = cmd->u.rx_can_header.channel;
+       const u8 *rx_data = NULL;       /* GCC */
+
+       if (channel >= dev->nchannels) {
+               dev_err(&dev->intf->dev,
+                       "Invalid channel number (%d)\n", channel);
+               return;
+       }
+
+       priv = dev->nets[channel];
+       stats = &priv->netdev->stats;
+
+       if ((cmd->u.rx_can_header.flag & MSG_FLAG_ERROR_FRAME) &&
+           (dev->card_data.leaf.family == KVASER_LEAF &&
+            cmd->id == CMD_LEAF_LOG_MESSAGE)) {
+               kvaser_usb_leaf_leaf_rx_error(dev, cmd);
+               return;
+       } else if (cmd->u.rx_can_header.flag & (MSG_FLAG_ERROR_FRAME |
+                                               MSG_FLAG_NERR |
+                                               MSG_FLAG_OVERRUN)) {
+               kvaser_usb_leaf_rx_can_err(priv, cmd);
+               return;
+       } else if (cmd->u.rx_can_header.flag & ~MSG_FLAG_REMOTE_FRAME) {
+               netdev_warn(priv->netdev,
+                           "Unhandled frame (flags: 0x%02x)\n",
+                           cmd->u.rx_can_header.flag);
+               return;
+       }
+
+       switch (dev->card_data.leaf.family) {
+       case KVASER_LEAF:
+               rx_data = cmd->u.leaf.rx_can.data;
+               break;
+       case KVASER_USBCAN:
+               rx_data = cmd->u.usbcan.rx_can.data;
+               break;
+       }
+
+       skb = alloc_can_skb(priv->netdev, &cf);
+       if (!skb) {
+               stats->rx_dropped++;
+               return;
+       }
+
+       if (dev->card_data.leaf.family == KVASER_LEAF && cmd->id ==
+           CMD_LEAF_LOG_MESSAGE) {
+               cf->can_id = le32_to_cpu(cmd->u.leaf.log_message.id);
+               if (cf->can_id & KVASER_EXTENDED_FRAME)
+                       cf->can_id &= CAN_EFF_MASK | CAN_EFF_FLAG;
+               else
+                       cf->can_id &= CAN_SFF_MASK;
+
+               cf->can_dlc = get_can_dlc(cmd->u.leaf.log_message.dlc);
+
+               if (cmd->u.leaf.log_message.flags & MSG_FLAG_REMOTE_FRAME)
+                       cf->can_id |= CAN_RTR_FLAG;
+               else
+                       memcpy(cf->data, &cmd->u.leaf.log_message.data,
+                              cf->can_dlc);
+       } else {
+               cf->can_id = ((rx_data[0] & 0x1f) << 6) | (rx_data[1] & 0x3f);
+
+               if (cmd->id == CMD_RX_EXT_MESSAGE) {
+                       cf->can_id <<= 18;
+                       cf->can_id |= ((rx_data[2] & 0x0f) << 14) |
+                                     ((rx_data[3] & 0xff) << 6) |
+                                     (rx_data[4] & 0x3f);
+                       cf->can_id |= CAN_EFF_FLAG;
+               }
+
+               cf->can_dlc = get_can_dlc(rx_data[5]);
+
+               if (cmd->u.rx_can_header.flag & MSG_FLAG_REMOTE_FRAME)
+                       cf->can_id |= CAN_RTR_FLAG;
+               else
+                       memcpy(cf->data, &rx_data[6], cf->can_dlc);
+       }
+
+       stats->rx_packets++;
+       stats->rx_bytes += cf->can_dlc;
+       netif_rx(skb);
+}
+
+static void kvaser_usb_leaf_start_chip_reply(const struct kvaser_usb *dev,
+                                            const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_net_priv *priv;
+       u8 channel = cmd->u.simple.channel;
+
+       if (channel >= dev->nchannels) {
+               dev_err(&dev->intf->dev,
+                       "Invalid channel number (%d)\n", channel);
+               return;
+       }
+
+       priv = dev->nets[channel];
+
+       if (completion_done(&priv->start_comp) &&
+           netif_queue_stopped(priv->netdev)) {
+               netif_wake_queue(priv->netdev);
+       } else {
+               netif_start_queue(priv->netdev);
+               complete(&priv->start_comp);
+       }
+}
+
+static void kvaser_usb_leaf_stop_chip_reply(const struct kvaser_usb *dev,
+                                           const struct kvaser_cmd *cmd)
+{
+       struct kvaser_usb_net_priv *priv;
+       u8 channel = cmd->u.simple.channel;
+
+       if (channel >= dev->nchannels) {
+               dev_err(&dev->intf->dev,
+                       "Invalid channel number (%d)\n", channel);
+               return;
+       }
+
+       priv = dev->nets[channel];
+
+       complete(&priv->stop_comp);
+}
+
+static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev,
+                                          const struct kvaser_cmd *cmd)
+{
+       switch (cmd->id) {
+       case CMD_START_CHIP_REPLY:
+               kvaser_usb_leaf_start_chip_reply(dev, cmd);
+               break;
+
+       case CMD_STOP_CHIP_REPLY:
+               kvaser_usb_leaf_stop_chip_reply(dev, cmd);
+               break;
+
+       case CMD_RX_STD_MESSAGE:
+       case CMD_RX_EXT_MESSAGE:
+               kvaser_usb_leaf_rx_can_msg(dev, cmd);
+               break;
+
+       case CMD_LEAF_LOG_MESSAGE:
+               if (dev->card_data.leaf.family != KVASER_LEAF)
+                       goto warn;
+               kvaser_usb_leaf_rx_can_msg(dev, cmd);
+               break;
+
+       case CMD_CHIP_STATE_EVENT:
+       case CMD_CAN_ERROR_EVENT:
+               if (dev->card_data.leaf.family == KVASER_LEAF)
+                       kvaser_usb_leaf_leaf_rx_error(dev, cmd);
+               else
+                       kvaser_usb_leaf_usbcan_rx_error(dev, cmd);
+               break;
+
+       case CMD_TX_ACKNOWLEDGE:
+               kvaser_usb_leaf_tx_acknowledge(dev, cmd);
+               break;
+
+       /* Ignored commands */
+       case CMD_USBCAN_CLOCK_OVERFLOW_EVENT:
+               if (dev->card_data.leaf.family != KVASER_USBCAN)
+                       goto warn;
+               break;
+
+       case CMD_FLUSH_QUEUE_REPLY:
+               if (dev->card_data.leaf.family != KVASER_LEAF)
+                       goto warn;
+               break;
+
+       default:
+warn:          dev_warn(&dev->intf->dev, "Unhandled command (%d)\n", cmd->id);
+               break;
+       }
+}
+
+static void kvaser_usb_leaf_read_bulk_callback(struct kvaser_usb *dev,
+                                              void *buf, int len)
+{
+       struct kvaser_cmd *cmd;
+       int pos = 0;
+
+       while (pos <= len - CMD_HEADER_LEN) {
+               cmd = buf + pos;
+
+               /* The Kvaser firmware can only read and write commands that
+                * does not cross the USB's endpoint wMaxPacketSize boundary.
+                * If a follow-up command crosses such boundary, firmware puts
+                * a placeholder zero-length command in its place then aligns
+                * the real command to the next max packet size.
+                *
+                * Handle such cases or we're going to miss a significant
+                * number of events in case of a heavy rx load on the bus.
+                */
+               if (cmd->len == 0) {
+                       pos = round_up(pos, le16_to_cpu
+                                               (dev->bulk_in->wMaxPacketSize));
+                       continue;
+               }
+
+               if (pos + cmd->len > len) {
+                       dev_err_ratelimited(&dev->intf->dev, "Format error\n");
+                       break;
+               }
+
+               kvaser_usb_leaf_handle_command(dev, cmd);
+               pos += cmd->len;
+       }
+}
+
+static int kvaser_usb_leaf_set_opt_mode(const struct kvaser_usb_net_priv *priv)
+{
+       struct kvaser_cmd *cmd;
+       int rc;
+
+       cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->id = CMD_SET_CTRL_MODE;
+       cmd->len = CMD_HEADER_LEN + sizeof(struct kvaser_cmd_ctrl_mode);
+       cmd->u.ctrl_mode.tid = 0xff;
+       cmd->u.ctrl_mode.channel = priv->channel;
+
+       if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
+               cmd->u.ctrl_mode.ctrl_mode = KVASER_CTRL_MODE_SILENT;
+       else
+               cmd->u.ctrl_mode.ctrl_mode = KVASER_CTRL_MODE_NORMAL;
+
+       rc = kvaser_usb_send_cmd(priv->dev, cmd, cmd->len);
+
+       kfree(cmd);
+       return rc;
+}
+
+static int kvaser_usb_leaf_start_chip(struct kvaser_usb_net_priv *priv)
+{
+       int err;
+
+       init_completion(&priv->start_comp);
+
+       err = kvaser_usb_leaf_send_simple_cmd(priv->dev, CMD_START_CHIP,
+                                             priv->channel);
+       if (err)
+               return err;
+
+       if (!wait_for_completion_timeout(&priv->start_comp,
+                                        msecs_to_jiffies(KVASER_USB_TIMEOUT)))
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+static int kvaser_usb_leaf_stop_chip(struct kvaser_usb_net_priv *priv)
+{
+       int err;
+
+       init_completion(&priv->stop_comp);
+
+       err = kvaser_usb_leaf_send_simple_cmd(priv->dev, CMD_STOP_CHIP,
+                                             priv->channel);
+       if (err)
+               return err;
+
+       if (!wait_for_completion_timeout(&priv->stop_comp,
+                                        msecs_to_jiffies(KVASER_USB_TIMEOUT)))
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+static int kvaser_usb_leaf_reset_chip(struct kvaser_usb *dev, int channel)
+{
+       return kvaser_usb_leaf_send_simple_cmd(dev, CMD_RESET_CHIP, channel);
+}
+
+static int kvaser_usb_leaf_flush_queue(struct kvaser_usb_net_priv *priv)
+{
+       struct kvaser_cmd *cmd;
+       int rc;
+
+       cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->id = CMD_FLUSH_QUEUE;
+       cmd->len = CMD_HEADER_LEN + sizeof(struct kvaser_cmd_flush_queue);
+       cmd->u.flush_queue.channel = priv->channel;
+       cmd->u.flush_queue.flags = 0x00;
+
+       rc = kvaser_usb_send_cmd(priv->dev, cmd, cmd->len);
+
+       kfree(cmd);
+       return rc;
+}
+
+static int kvaser_usb_leaf_init_card(struct kvaser_usb *dev)
+{
+       struct kvaser_usb_dev_card_data *card_data = &dev->card_data;
+
+       dev->cfg = &kvaser_usb_leaf_dev_cfg;
+       card_data->ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
+
+       return 0;
+}
+
+static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = {
+       .name = "kvaser_usb",
+       .tseg1_min = KVASER_USB_TSEG1_MIN,
+       .tseg1_max = KVASER_USB_TSEG1_MAX,
+       .tseg2_min = KVASER_USB_TSEG2_MIN,
+       .tseg2_max = KVASER_USB_TSEG2_MAX,
+       .sjw_max = KVASER_USB_SJW_MAX,
+       .brp_min = KVASER_USB_BRP_MIN,
+       .brp_max = KVASER_USB_BRP_MAX,
+       .brp_inc = KVASER_USB_BRP_INC,
+};
+
+static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev)
+{
+       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+       struct can_bittiming *bt = &priv->can.bittiming;
+       struct kvaser_usb *dev = priv->dev;
+       struct kvaser_cmd *cmd;
+       int rc;
+
+       cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       cmd->id = CMD_SET_BUS_PARAMS;
+       cmd->len = CMD_HEADER_LEN + sizeof(struct kvaser_cmd_busparams);
+       cmd->u.busparams.channel = priv->channel;
+       cmd->u.busparams.tid = 0xff;
+       cmd->u.busparams.bitrate = cpu_to_le32(bt->bitrate);
+       cmd->u.busparams.sjw = bt->sjw;
+       cmd->u.busparams.tseg1 = bt->prop_seg + bt->phase_seg1;
+       cmd->u.busparams.tseg2 = bt->phase_seg2;
+
+       if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+               cmd->u.busparams.no_samp = 3;
+       else
+               cmd->u.busparams.no_samp = 1;
+
+       rc = kvaser_usb_send_cmd(dev, cmd, cmd->len);
+
+       kfree(cmd);
+       return rc;
+}
+
+static int kvaser_usb_leaf_set_mode(struct net_device *netdev,
+                                   enum can_mode mode)
+{
+       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+       int err;
+
+       switch (mode) {
+       case CAN_MODE_START:
+               err = kvaser_usb_leaf_simple_cmd_async(priv, CMD_START_CHIP);
+               if (err)
+                       return err;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int kvaser_usb_leaf_get_berr_counter(const struct net_device *netdev,
+                                           struct can_berr_counter *bec)
+{
+       struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
+
+       *bec = priv->bec;
+
+       return 0;
+}
+
+static int kvaser_usb_leaf_setup_endpoints(struct kvaser_usb *dev)
+{
+       const struct usb_host_interface *iface_desc;
+       struct usb_endpoint_descriptor *endpoint;
+       int i;
+
+       iface_desc = &dev->intf->altsetting[0];
+
+       for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
+               endpoint = &iface_desc->endpoint[i].desc;
+
+               if (!dev->bulk_in && usb_endpoint_is_bulk_in(endpoint))
+                       dev->bulk_in = endpoint;
+
+               if (!dev->bulk_out && usb_endpoint_is_bulk_out(endpoint))
+                       dev->bulk_out = endpoint;
+
+               /* use first bulk endpoint for in and out */
+               if (dev->bulk_in && dev->bulk_out)
+                       return 0;
+       }
+
+       return -ENODEV;
+}
+
+const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = {
+       .dev_set_mode = kvaser_usb_leaf_set_mode,
+       .dev_set_bittiming = kvaser_usb_leaf_set_bittiming,
+       .dev_set_data_bittiming = NULL,
+       .dev_get_berr_counter = kvaser_usb_leaf_get_berr_counter,
+       .dev_setup_endpoints = kvaser_usb_leaf_setup_endpoints,
+       .dev_init_card = kvaser_usb_leaf_init_card,
+       .dev_get_software_info = kvaser_usb_leaf_get_software_info,
+       .dev_get_software_details = NULL,
+       .dev_get_card_info = kvaser_usb_leaf_get_card_info,
+       .dev_get_capabilities = NULL,
+       .dev_set_opt_mode = kvaser_usb_leaf_set_opt_mode,
+       .dev_start_chip = kvaser_usb_leaf_start_chip,
+       .dev_stop_chip = kvaser_usb_leaf_stop_chip,
+       .dev_reset_chip = kvaser_usb_leaf_reset_chip,
+       .dev_flush_queue = kvaser_usb_leaf_flush_queue,
+       .dev_read_bulk_callback = kvaser_usb_leaf_read_bulk_callback,
+       .dev_frame_to_cmd = kvaser_usb_leaf_frame_to_cmd,
+};
+
+static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg = {
+       .clock = {
+               .freq = CAN_USB_CLOCK,
+       },
+       .timestamp_freq = 1,
+       .bittiming_const = &kvaser_usb_leaf_bittiming_const,
+};
index f530a80f505115675e01cb60223e06016e5e4a9b..13238a72a33862a7c2acfc966c03f93d206d82be 100644 (file)
@@ -423,6 +423,7 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n,
                        new_state = CAN_STATE_ERROR_WARNING;
                        break;
                }
+               /* else: fall through */
 
        case CAN_STATE_ERROR_WARNING:
                if (n & PCAN_USB_ERROR_BUS_HEAVY) {
index 50e9114286380258843fb582e4d1a100a41c743d..611f9d31be5d0370612fe8d4f9771b88dd9f3d37 100644 (file)
@@ -353,6 +353,7 @@ static netdev_tx_t peak_usb_ndo_start_xmit(struct sk_buff *skb,
                default:
                        netdev_warn(netdev, "tx urb submitting failed err=%d\n",
                                    err);
+                       /* fall through */
                case -ENOENT:
                        /* cable unplugged */
                        stats->tx_dropped++;
index 0105fbfea273ad4c874006d72e5c0e94dbfb8b95..d516def846abec6c661dc185da1d64b73ca22441 100644 (file)
@@ -141,8 +141,10 @@ static int pcan_msg_add_rec(struct pcan_usb_pro_msg *pm, u8 id, ...)
        switch (id) {
        case PCAN_USBPRO_TXMSG8:
                i += 4;
+               /* fall through */
        case PCAN_USBPRO_TXMSG4:
                i += 4;
+               /* fall through */
        case PCAN_USBPRO_TXMSG0:
                *pc++ = va_arg(ap, int);
                *pc++ = va_arg(ap, int);
diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c
new file mode 100644 (file)
index 0000000..0678a38
--- /dev/null
@@ -0,0 +1,1613 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Driver for Theobroma Systems UCAN devices, Protocol Version 3
+ *
+ * Copyright (C) 2018 Theobroma Systems Design und Consulting GmbH
+ *
+ *
+ * General Description:
+ *
+ * The USB Device uses three Endpoints:
+ *
+ *   CONTROL Endpoint: Is used the setup the device (start, stop,
+ *   info, configure).
+ *
+ *   IN Endpoint: The device sends CAN Frame Messages and Device
+ *   Information using the IN endpoint.
+ *
+ *   OUT Endpoint: The driver sends configuration requests, and CAN
+ *   Frames on the out endpoint.
+ *
+ * Error Handling:
+ *
+ *   If error reporting is turned on the device encodes error into CAN
+ *   error frames (see uapi/linux/can/error.h) and sends it using the
+ *   IN Endpoint. The driver updates statistics and forward it.
+ */
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/signal.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+
+#define UCAN_DRIVER_NAME "ucan"
+#define UCAN_MAX_RX_URBS 8
+/* the CAN controller needs a while to enable/disable the bus */
+#define UCAN_USB_CTL_PIPE_TIMEOUT 1000
+/* this driver currently supports protocol version 3 only */
+#define UCAN_PROTOCOL_VERSION_MIN 3
+#define UCAN_PROTOCOL_VERSION_MAX 3
+
+/* UCAN Message Definitions
+ * ------------------------
+ *
+ *  ucan_message_out_t and ucan_message_in_t define the messages
+ *  transmitted on the OUT and IN endpoint.
+ *
+ *  Multibyte fields are transmitted with little endianness
+ *
+ *  INTR Endpoint: a single uint32_t storing the current space in the fifo
+ *
+ *  OUT Endpoint: single message of type ucan_message_out_t is
+ *    transmitted on the out endpoint
+ *
+ *  IN Endpoint: multiple messages ucan_message_in_t concateted in
+ *    the following way:
+ *
+ *     m[n].len <=> the length if message n(including the header in bytes)
+ *     m[n] is is aligned to a 4 byte boundary, hence
+ *       offset(m[0])   := 0;
+ *       offset(m[n+1]) := offset(m[n]) + (m[n].len + 3) & 3
+ *
+ *     this implies that
+ *       offset(m[n]) % 4 <=> 0
+ */
+
+/* Device Global Commands */
+enum {
+       UCAN_DEVICE_GET_FW_STRING = 0,
+};
+
+/* UCAN Commands */
+enum {
+       /* start the can transceiver - val defines the operation mode */
+       UCAN_COMMAND_START = 0,
+       /* cancel pending transmissions and stop the can transceiver */
+       UCAN_COMMAND_STOP = 1,
+       /* send can transceiver into low-power sleep mode */
+       UCAN_COMMAND_SLEEP = 2,
+       /* wake up can transceiver from low-power sleep mode */
+       UCAN_COMMAND_WAKEUP = 3,
+       /* reset the can transceiver */
+       UCAN_COMMAND_RESET = 4,
+       /* get piece of info from the can transceiver - subcmd defines what
+        * piece
+        */
+       UCAN_COMMAND_GET = 5,
+       /* clear or disable hardware filter - subcmd defines which of the two */
+       UCAN_COMMAND_FILTER = 6,
+       /* Setup bittiming */
+       UCAN_COMMAND_SET_BITTIMING = 7,
+       /* recover from bus-off state */
+       UCAN_COMMAND_RESTART = 8,
+};
+
+/* UCAN_COMMAND_START and UCAN_COMMAND_GET_INFO operation modes (bitmap).
+ * Undefined bits must be set to 0.
+ */
+enum {
+       UCAN_MODE_LOOPBACK = BIT(0),
+       UCAN_MODE_SILENT = BIT(1),
+       UCAN_MODE_3_SAMPLES = BIT(2),
+       UCAN_MODE_ONE_SHOT = BIT(3),
+       UCAN_MODE_BERR_REPORT = BIT(4),
+};
+
+/* UCAN_COMMAND_GET subcommands */
+enum {
+       UCAN_COMMAND_GET_INFO = 0,
+       UCAN_COMMAND_GET_PROTOCOL_VERSION = 1,
+};
+
+/* UCAN_COMMAND_FILTER subcommands */
+enum {
+       UCAN_FILTER_CLEAR = 0,
+       UCAN_FILTER_DISABLE = 1,
+       UCAN_FILTER_ENABLE = 2,
+};
+
+/* OUT endpoint message types */
+enum {
+       UCAN_OUT_TX = 2,     /* transmit a CAN frame */
+};
+
+/* IN endpoint message types */
+enum {
+       UCAN_IN_TX_COMPLETE = 1,  /* CAN frame transmission completed */
+       UCAN_IN_RX = 2,           /* CAN frame received */
+};
+
+struct ucan_ctl_cmd_start {
+       __le16 mode;         /* OR-ing any of UCAN_MODE_* */
+} __packed;
+
+struct ucan_ctl_cmd_set_bittiming {
+       __le32 tq;           /* Time quanta (TQ) in nanoseconds */
+       __le16 brp;          /* TQ Prescaler */
+       __le16 sample_point; /* Samplepoint on tenth percent */
+       u8 prop_seg;         /* Propagation segment in TQs */
+       u8 phase_seg1;       /* Phase buffer segment 1 in TQs */
+       u8 phase_seg2;       /* Phase buffer segment 2 in TQs */
+       u8 sjw;              /* Synchronisation jump width in TQs */
+} __packed;
+
+struct ucan_ctl_cmd_device_info {
+       __le32 freq;         /* Clock Frequency for tq generation */
+       u8 tx_fifo;          /* Size of the transmission fifo */
+       u8 sjw_max;          /* can_bittiming fields... */
+       u8 tseg1_min;
+       u8 tseg1_max;
+       u8 tseg2_min;
+       u8 tseg2_max;
+       __le16 brp_inc;
+       __le32 brp_min;
+       __le32 brp_max;      /* ...can_bittiming fields */
+       __le16 ctrlmodes;    /* supported control modes */
+       __le16 hwfilter;     /* Number of HW filter banks */
+       __le16 rxmboxes;     /* Number of receive Mailboxes */
+} __packed;
+
+struct ucan_ctl_cmd_get_protocol_version {
+       __le32 version;
+} __packed;
+
+union ucan_ctl_payload {
+       /* Setup Bittiming
+        * bmRequest == UCAN_COMMAND_START
+        */
+       struct ucan_ctl_cmd_start cmd_start;
+       /* Setup Bittiming
+        * bmRequest == UCAN_COMMAND_SET_BITTIMING
+        */
+       struct ucan_ctl_cmd_set_bittiming cmd_set_bittiming;
+       /* Get Device Information
+        * bmRequest == UCAN_COMMAND_GET; wValue = UCAN_COMMAND_GET_INFO
+        */
+       struct ucan_ctl_cmd_device_info cmd_get_device_info;
+       /* Get Protocol Version
+        * bmRequest == UCAN_COMMAND_GET;
+        * wValue = UCAN_COMMAND_GET_PROTOCOL_VERSION
+        */
+       struct ucan_ctl_cmd_get_protocol_version cmd_get_protocol_version;
+
+       u8 raw[128];
+} __packed;
+
+enum {
+       UCAN_TX_COMPLETE_SUCCESS = BIT(0),
+};
+
+/* Transmission Complete within ucan_message_in */
+struct ucan_tx_complete_entry_t {
+       u8 echo_index;
+       u8 flags;
+} __packed __aligned(0x2);
+
+/* CAN Data message format within ucan_message_in/out */
+struct ucan_can_msg {
+       /* note DLC is computed by
+        *    msg.len - sizeof (msg.len)
+        *            - sizeof (msg.type)
+        *            - sizeof (msg.can_msg.id)
+        */
+       __le32 id;
+
+       union {
+               u8 data[CAN_MAX_DLEN];  /* Data of CAN frames */
+               u8 dlc;                 /* RTR dlc */
+       };
+} __packed;
+
+/* OUT Endpoint, outbound messages */
+struct ucan_message_out {
+       __le16 len; /* Length of the content include header */
+       u8 type;    /* UCAN_OUT_TX and friends */
+       u8 subtype; /* command sub type */
+
+       union {
+               /* Transmit CAN frame
+                * (type == UCAN_TX) && ((msg.can_msg.id & CAN_RTR_FLAG) == 0)
+                * subtype stores the echo id
+                */
+               struct ucan_can_msg can_msg;
+       } msg;
+} __packed __aligned(0x4);
+
+/* IN Endpoint, inbound messages */
+struct ucan_message_in {
+       __le16 len; /* Length of the content include header */
+       u8 type;    /* UCAN_IN_RX and friends */
+       u8 subtype; /* command sub type */
+
+       union {
+               /* CAN Frame received
+                * (type == UCAN_IN_RX)
+                * && ((msg.can_msg.id & CAN_RTR_FLAG) == 0)
+                */
+               struct ucan_can_msg can_msg;
+
+               /* CAN transmission complete
+                * (type == UCAN_IN_TX_COMPLETE)
+                */
+               struct ucan_tx_complete_entry_t can_tx_complete_msg[0];
+       } __aligned(0x4) msg;
+} __packed;
+
+/* Macros to calculate message lengths */
+#define UCAN_OUT_HDR_SIZE offsetof(struct ucan_message_out, msg)
+
+#define UCAN_IN_HDR_SIZE offsetof(struct ucan_message_in, msg)
+#define UCAN_IN_LEN(member) (UCAN_OUT_HDR_SIZE + sizeof(member))
+
+struct ucan_priv;
+
+/* Context Information for transmission URBs */
+struct ucan_urb_context {
+       struct ucan_priv *up;
+       u8 dlc;
+       bool allocated;
+};
+
+/* Information reported by the USB device */
+struct ucan_device_info {
+       struct can_bittiming_const bittiming_const;
+       u8 tx_fifo;
+};
+
+/* Driver private data */
+struct ucan_priv {
+       /* must be the first member */
+       struct can_priv can;
+
+       /* linux USB device structures */
+       struct usb_device *udev;
+       struct usb_interface *intf;
+       struct net_device *netdev;
+
+       /* lock for can->echo_skb (used around
+        * can_put/get/free_echo_skb
+        */
+       spinlock_t echo_skb_lock;
+
+       /* usb device information information */
+       u8 intf_index;
+       u8 in_ep_addr;
+       u8 out_ep_addr;
+       u16 in_ep_size;
+
+       /* transmission and reception buffers */
+       struct usb_anchor rx_urbs;
+       struct usb_anchor tx_urbs;
+
+       union ucan_ctl_payload *ctl_msg_buffer;
+       struct ucan_device_info device_info;
+
+       /* transmission control information and locks */
+       spinlock_t context_lock;
+       unsigned int available_tx_urbs;
+       struct ucan_urb_context *context_array;
+};
+
+static u8 ucan_get_can_dlc(struct ucan_can_msg *msg, u16 len)
+{
+       if (le32_to_cpu(msg->id) & CAN_RTR_FLAG)
+               return get_can_dlc(msg->dlc);
+       else
+               return get_can_dlc(len - (UCAN_IN_HDR_SIZE + sizeof(msg->id)));
+}
+
+static void ucan_release_context_array(struct ucan_priv *up)
+{
+       if (!up->context_array)
+               return;
+
+       /* lock is not needed because, driver is currently opening or closing */
+       up->available_tx_urbs = 0;
+
+       kfree(up->context_array);
+       up->context_array = NULL;
+}
+
+static int ucan_alloc_context_array(struct ucan_priv *up)
+{
+       int i;
+
+       /* release contexts if any */
+       ucan_release_context_array(up);
+
+       up->context_array = kcalloc(up->device_info.tx_fifo,
+                                   sizeof(*up->context_array),
+                                   GFP_KERNEL);
+       if (!up->context_array) {
+               netdev_err(up->netdev,
+                          "Not enough memory to allocate tx contexts\n");
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < up->device_info.tx_fifo; i++) {
+               up->context_array[i].allocated = false;
+               up->context_array[i].up = up;
+       }
+
+       /* lock is not needed because, driver is currently opening */
+       up->available_tx_urbs = up->device_info.tx_fifo;
+
+       return 0;
+}
+
+static struct ucan_urb_context *ucan_alloc_context(struct ucan_priv *up)
+{
+       int i;
+       unsigned long flags;
+       struct ucan_urb_context *ret = NULL;
+
+       if (WARN_ON_ONCE(!up->context_array))
+               return NULL;
+
+       /* execute context operation atomically */
+       spin_lock_irqsave(&up->context_lock, flags);
+
+       for (i = 0; i < up->device_info.tx_fifo; i++) {
+               if (!up->context_array[i].allocated) {
+                       /* update context */
+                       ret = &up->context_array[i];
+                       up->context_array[i].allocated = true;
+
+                       /* stop queue if necessary */
+                       up->available_tx_urbs--;
+                       if (!up->available_tx_urbs)
+                               netif_stop_queue(up->netdev);
+
+                       break;
+               }
+       }
+
+       spin_unlock_irqrestore(&up->context_lock, flags);
+       return ret;
+}
+
+static bool ucan_release_context(struct ucan_priv *up,
+                                struct ucan_urb_context *ctx)
+{
+       unsigned long flags;
+       bool ret = false;
+
+       if (WARN_ON_ONCE(!up->context_array))
+               return false;
+
+       /* execute context operation atomically */
+       spin_lock_irqsave(&up->context_lock, flags);
+
+       /* context was not allocated, maybe the device sent garbage */
+       if (ctx->allocated) {
+               ctx->allocated = false;
+
+               /* check if the queue needs to be woken */
+               if (!up->available_tx_urbs)
+                       netif_wake_queue(up->netdev);
+               up->available_tx_urbs++;
+
+               ret = true;
+       }
+
+       spin_unlock_irqrestore(&up->context_lock, flags);
+       return ret;
+}
+
+static int ucan_ctrl_command_out(struct ucan_priv *up,
+                                u8 cmd, u16 subcmd, u16 datalen)
+{
+       return usb_control_msg(up->udev,
+                              usb_sndctrlpipe(up->udev, 0),
+                              cmd,
+                              USB_DIR_OUT | USB_TYPE_VENDOR |
+                                               USB_RECIP_INTERFACE,
+                              subcmd,
+                              up->intf_index,
+                              up->ctl_msg_buffer,
+                              datalen,
+                              UCAN_USB_CTL_PIPE_TIMEOUT);
+}
+
+static int ucan_device_request_in(struct ucan_priv *up,
+                                 u8 cmd, u16 subcmd, u16 datalen)
+{
+       return usb_control_msg(up->udev,
+                              usb_rcvctrlpipe(up->udev, 0),
+                              cmd,
+                              USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+                              subcmd,
+                              0,
+                              up->ctl_msg_buffer,
+                              datalen,
+                              UCAN_USB_CTL_PIPE_TIMEOUT);
+}
+
+/* Parse the device information structure reported by the device and
+ * setup private variables accordingly
+ */
+static void ucan_parse_device_info(struct ucan_priv *up,
+                                  struct ucan_ctl_cmd_device_info *device_info)
+{
+       struct can_bittiming_const *bittiming =
+               &up->device_info.bittiming_const;
+       u16 ctrlmodes;
+
+       /* store the data */
+       up->can.clock.freq = le32_to_cpu(device_info->freq);
+       up->device_info.tx_fifo = device_info->tx_fifo;
+       strcpy(bittiming->name, "ucan");
+       bittiming->tseg1_min = device_info->tseg1_min;
+       bittiming->tseg1_max = device_info->tseg1_max;
+       bittiming->tseg2_min = device_info->tseg2_min;
+       bittiming->tseg2_max = device_info->tseg2_max;
+       bittiming->sjw_max = device_info->sjw_max;
+       bittiming->brp_min = le32_to_cpu(device_info->brp_min);
+       bittiming->brp_max = le32_to_cpu(device_info->brp_max);
+       bittiming->brp_inc = le16_to_cpu(device_info->brp_inc);
+
+       ctrlmodes = le16_to_cpu(device_info->ctrlmodes);
+
+       up->can.ctrlmode_supported = 0;
+
+       if (ctrlmodes & UCAN_MODE_LOOPBACK)
+               up->can.ctrlmode_supported |= CAN_CTRLMODE_LOOPBACK;
+       if (ctrlmodes & UCAN_MODE_SILENT)
+               up->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY;
+       if (ctrlmodes & UCAN_MODE_3_SAMPLES)
+               up->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
+       if (ctrlmodes & UCAN_MODE_ONE_SHOT)
+               up->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT;
+       if (ctrlmodes & UCAN_MODE_BERR_REPORT)
+               up->can.ctrlmode_supported |= CAN_CTRLMODE_BERR_REPORTING;
+}
+
+/* Handle a CAN error frame that we have received from the device.
+ * Returns true if the can state has changed.
+ */
+static bool ucan_handle_error_frame(struct ucan_priv *up,
+                                   struct ucan_message_in *m,
+                                   canid_t canid)
+{
+       enum can_state new_state = up->can.state;
+       struct net_device_stats *net_stats = &up->netdev->stats;
+       struct can_device_stats *can_stats = &up->can.can_stats;
+
+       if (canid & CAN_ERR_LOSTARB)
+               can_stats->arbitration_lost++;
+
+       if (canid & CAN_ERR_BUSERROR)
+               can_stats->bus_error++;
+
+       if (canid & CAN_ERR_ACK)
+               net_stats->tx_errors++;
+
+       if (canid & CAN_ERR_BUSOFF)
+               new_state = CAN_STATE_BUS_OFF;
+
+       /* controller problems, details in data[1] */
+       if (canid & CAN_ERR_CRTL) {
+               u8 d1 = m->msg.can_msg.data[1];
+
+               if (d1 & CAN_ERR_CRTL_RX_OVERFLOW)
+                       net_stats->rx_over_errors++;
+
+               /* controller state bits: if multiple are set the worst wins */
+               if (d1 & CAN_ERR_CRTL_ACTIVE)
+                       new_state = CAN_STATE_ERROR_ACTIVE;
+
+               if (d1 & (CAN_ERR_CRTL_RX_WARNING | CAN_ERR_CRTL_TX_WARNING))
+                       new_state = CAN_STATE_ERROR_WARNING;
+
+               if (d1 & (CAN_ERR_CRTL_RX_PASSIVE | CAN_ERR_CRTL_TX_PASSIVE))
+                       new_state = CAN_STATE_ERROR_PASSIVE;
+       }
+
+       /* protocol error, details in data[2] */
+       if (canid & CAN_ERR_PROT) {
+               u8 d2 = m->msg.can_msg.data[2];
+
+               if (d2 & CAN_ERR_PROT_TX)
+                       net_stats->tx_errors++;
+               else
+                       net_stats->rx_errors++;
+       }
+
+       /* no state change - we are done */
+       if (up->can.state == new_state)
+               return false;
+
+       /* we switched into a better state */
+       if (up->can.state > new_state) {
+               up->can.state = new_state;
+               return true;
+       }
+
+       /* we switched into a worse state */
+       up->can.state = new_state;
+       switch (new_state) {
+       case CAN_STATE_BUS_OFF:
+               can_stats->bus_off++;
+               can_bus_off(up->netdev);
+               break;
+       case CAN_STATE_ERROR_PASSIVE:
+               can_stats->error_passive++;
+               break;
+       case CAN_STATE_ERROR_WARNING:
+               can_stats->error_warning++;
+               break;
+       default:
+               break;
+       }
+       return true;
+}
+
+/* Callback on reception of a can frame via the IN endpoint
+ *
+ * This function allocates an skb and transferres it to the Linux
+ * network stack
+ */
+static void ucan_rx_can_msg(struct ucan_priv *up, struct ucan_message_in *m)
+{
+       int len;
+       canid_t canid;
+       struct can_frame *cf;
+       struct sk_buff *skb;
+       struct net_device_stats *stats = &up->netdev->stats;
+
+       /* get the contents of the length field */
+       len = le16_to_cpu(m->len);
+
+       /* check sanity */
+       if (len < UCAN_IN_HDR_SIZE + sizeof(m->msg.can_msg.id)) {
+               netdev_warn(up->netdev, "invalid input message len: %d\n", len);
+               return;
+       }
+
+       /* handle error frames */
+       canid = le32_to_cpu(m->msg.can_msg.id);
+       if (canid & CAN_ERR_FLAG) {
+               bool busstate_changed = ucan_handle_error_frame(up, m, canid);
+
+               /* if berr-reporting is off only state changes get through */
+               if (!(up->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) &&
+                   !busstate_changed)
+                       return;
+       } else {
+               canid_t canid_mask;
+               /* compute the mask for canid */
+               canid_mask = CAN_RTR_FLAG;
+               if (canid & CAN_EFF_FLAG)
+                       canid_mask |= CAN_EFF_MASK | CAN_EFF_FLAG;
+               else
+                       canid_mask |= CAN_SFF_MASK;
+
+               if (canid & ~canid_mask)
+                       netdev_warn(up->netdev,
+                                   "unexpected bits set (canid %x, mask %x)",
+                                   canid, canid_mask);
+
+               canid &= canid_mask;
+       }
+
+       /* allocate skb */
+       skb = alloc_can_skb(up->netdev, &cf);
+       if (!skb)
+               return;
+
+       /* fill the can frame */
+       cf->can_id = canid;
+
+       /* compute DLC taking RTR_FLAG into account */
+       cf->can_dlc = ucan_get_can_dlc(&m->msg.can_msg, len);
+
+       /* copy the payload of non RTR frames */
+       if (!(cf->can_id & CAN_RTR_FLAG) || (cf->can_id & CAN_ERR_FLAG))
+               memcpy(cf->data, m->msg.can_msg.data, cf->can_dlc);
+
+       /* don't count error frames as real packets */
+       stats->rx_packets++;
+       stats->rx_bytes += cf->can_dlc;
+
+       /* pass it to Linux */
+       netif_rx(skb);
+}
+
+/* callback indicating completed transmission */
+static void ucan_tx_complete_msg(struct ucan_priv *up,
+                                struct ucan_message_in *m)
+{
+       unsigned long flags;
+       u16 count, i;
+       u8 echo_index, dlc;
+       u16 len = le16_to_cpu(m->len);
+
+       struct ucan_urb_context *context;
+
+       if (len < UCAN_IN_HDR_SIZE || (len % 2 != 0)) {
+               netdev_err(up->netdev, "invalid tx complete length\n");
+               return;
+       }
+
+       count = (len - UCAN_IN_HDR_SIZE) / 2;
+       for (i = 0; i < count; i++) {
+               /* we did not submit such echo ids */
+               echo_index = m->msg.can_tx_complete_msg[i].echo_index;
+               if (echo_index >= up->device_info.tx_fifo) {
+                       up->netdev->stats.tx_errors++;
+                       netdev_err(up->netdev,
+                                  "invalid echo_index %d received\n",
+                                  echo_index);
+                       continue;
+               }
+
+               /* gather information from the context */
+               context = &up->context_array[echo_index];
+               dlc = READ_ONCE(context->dlc);
+
+               /* Release context and restart queue if necessary.
+                * Also check if the context was allocated
+                */
+               if (!ucan_release_context(up, context))
+                       continue;
+
+               spin_lock_irqsave(&up->echo_skb_lock, flags);
+               if (m->msg.can_tx_complete_msg[i].flags &
+                   UCAN_TX_COMPLETE_SUCCESS) {
+                       /* update statistics */
+                       up->netdev->stats.tx_packets++;
+                       up->netdev->stats.tx_bytes += dlc;
+                       can_get_echo_skb(up->netdev, echo_index);
+               } else {
+                       up->netdev->stats.tx_dropped++;
+                       can_free_echo_skb(up->netdev, echo_index);
+               }
+               spin_unlock_irqrestore(&up->echo_skb_lock, flags);
+       }
+}
+
+/* callback on reception of a USB message */
+static void ucan_read_bulk_callback(struct urb *urb)
+{
+       int ret;
+       int pos;
+       struct ucan_priv *up = urb->context;
+       struct net_device *netdev = up->netdev;
+       struct ucan_message_in *m;
+
+       /* the device is not up and the driver should not receive any
+        * data on the bulk in pipe
+        */
+       if (WARN_ON(!up->context_array)) {
+               usb_free_coherent(up->udev,
+                                 up->in_ep_size,
+                                 urb->transfer_buffer,
+                                 urb->transfer_dma);
+               return;
+       }
+
+       /* check URB status */
+       switch (urb->status) {
+       case 0:
+               break;
+       case -ENOENT:
+       case -EPIPE:
+       case -EPROTO:
+       case -ESHUTDOWN:
+       case -ETIME:
+               /* urb is not resubmitted -> free dma data */
+               usb_free_coherent(up->udev,
+                                 up->in_ep_size,
+                                 urb->transfer_buffer,
+                                 urb->transfer_dma);
+               netdev_dbg(up->netdev, "not resumbmitting urb; status: %d\n",
+                          urb->status);
+               return;
+       default:
+               goto resubmit;
+       }
+
+       /* sanity check */
+       if (!netif_device_present(netdev))
+               return;
+
+       /* iterate over input */
+       pos = 0;
+       while (pos < urb->actual_length) {
+               int len;
+
+               /* check sanity (length of header) */
+               if ((urb->actual_length - pos) < UCAN_IN_HDR_SIZE) {
+                       netdev_warn(up->netdev,
+                                   "invalid message (short; no hdr; l:%d)\n",
+                                   urb->actual_length);
+                       goto resubmit;
+               }
+
+               /* setup the message address */
+               m = (struct ucan_message_in *)
+                       ((u8 *)urb->transfer_buffer + pos);
+               len = le16_to_cpu(m->len);
+
+               /* check sanity (length of content) */
+               if (urb->actual_length - pos < len) {
+                       netdev_warn(up->netdev,
+                                   "invalid message (short; no data; l:%d)\n",
+                                   urb->actual_length);
+                       print_hex_dump(KERN_WARNING,
+                                      "raw data: ",
+                                      DUMP_PREFIX_ADDRESS,
+                                      16,
+                                      1,
+                                      urb->transfer_buffer,
+                                      urb->actual_length,
+                                      true);
+
+                       goto resubmit;
+               }
+
+               switch (m->type) {
+               case UCAN_IN_RX:
+                       ucan_rx_can_msg(up, m);
+                       break;
+               case UCAN_IN_TX_COMPLETE:
+                       ucan_tx_complete_msg(up, m);
+                       break;
+               default:
+                       netdev_warn(up->netdev,
+                                   "invalid message (type; t:%d)\n",
+                                   m->type);
+                       break;
+               }
+
+               /* proceed to next message */
+               pos += len;
+               /* align to 4 byte boundary */
+               pos = round_up(pos, 4);
+       }
+
+resubmit:
+       /* resubmit urb when done */
+       usb_fill_bulk_urb(urb, up->udev,
+                         usb_rcvbulkpipe(up->udev,
+                                         up->in_ep_addr),
+                         urb->transfer_buffer,
+                         up->in_ep_size,
+                         ucan_read_bulk_callback,
+                         up);
+
+       usb_anchor_urb(urb, &up->rx_urbs);
+       ret = usb_submit_urb(urb, GFP_KERNEL);
+
+       if (ret < 0) {
+               netdev_err(up->netdev,
+                          "failed resubmitting read bulk urb: %d\n",
+                          ret);
+
+               usb_unanchor_urb(urb);
+               usb_free_coherent(up->udev,
+                                 up->in_ep_size,
+                                 urb->transfer_buffer,
+                                 urb->transfer_dma);
+
+               if (ret == -ENODEV)
+                       netif_device_detach(netdev);
+       }
+}
+
+/* callback after transmission of a USB message */
+static void ucan_write_bulk_callback(struct urb *urb)
+{
+       unsigned long flags;
+       struct ucan_priv *up;
+       struct ucan_urb_context *context = urb->context;
+
+       /* get the urb context */
+       if (WARN_ON_ONCE(!context))
+               return;
+
+       /* free up our allocated buffer */
+       usb_free_coherent(urb->dev,
+                         sizeof(struct ucan_message_out),
+                         urb->transfer_buffer,
+                         urb->transfer_dma);
+
+       up = context->up;
+       if (WARN_ON_ONCE(!up))
+               return;
+
+       /* sanity check */
+       if (!netif_device_present(up->netdev))
+               return;
+
+       /* transmission failed (USB - the device will not send a TX complete) */
+       if (urb->status) {
+               netdev_warn(up->netdev,
+                           "failed to transmit USB message to device: %d\n",
+                            urb->status);
+
+               /* update counters an cleanup */
+               spin_lock_irqsave(&up->echo_skb_lock, flags);
+               can_free_echo_skb(up->netdev, context - up->context_array);
+               spin_unlock_irqrestore(&up->echo_skb_lock, flags);
+
+               up->netdev->stats.tx_dropped++;
+
+               /* release context and restart the queue if necessary */
+               if (!ucan_release_context(up, context))
+                       netdev_err(up->netdev,
+                                  "urb failed, failed to release context\n");
+       }
+}
+
+static void ucan_cleanup_rx_urbs(struct ucan_priv *up, struct urb **urbs)
+{
+       int i;
+
+       for (i = 0; i < UCAN_MAX_RX_URBS; i++) {
+               if (urbs[i]) {
+                       usb_unanchor_urb(urbs[i]);
+                       usb_free_coherent(up->udev,
+                                         up->in_ep_size,
+                                         urbs[i]->transfer_buffer,
+                                         urbs[i]->transfer_dma);
+                       usb_free_urb(urbs[i]);
+               }
+       }
+
+       memset(urbs, 0, sizeof(*urbs) * UCAN_MAX_RX_URBS);
+}
+
+static int ucan_prepare_and_anchor_rx_urbs(struct ucan_priv *up,
+                                          struct urb **urbs)
+{
+       int i;
+
+       memset(urbs, 0, sizeof(*urbs) * UCAN_MAX_RX_URBS);
+
+       for (i = 0; i < UCAN_MAX_RX_URBS; i++) {
+               void *buf;
+
+               urbs[i] = usb_alloc_urb(0, GFP_KERNEL);
+               if (!urbs[i])
+                       goto err;
+
+               buf = usb_alloc_coherent(up->udev,
+                                        up->in_ep_size,
+                                        GFP_KERNEL, &urbs[i]->transfer_dma);
+               if (!buf) {
+                       /* cleanup this urb */
+                       usb_free_urb(urbs[i]);
+                       urbs[i] = NULL;
+                       goto err;
+               }
+
+               usb_fill_bulk_urb(urbs[i], up->udev,
+                                 usb_rcvbulkpipe(up->udev,
+                                                 up->in_ep_addr),
+                                 buf,
+                                 up->in_ep_size,
+                                 ucan_read_bulk_callback,
+                                 up);
+
+               urbs[i]->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+               usb_anchor_urb(urbs[i], &up->rx_urbs);
+       }
+       return 0;
+
+err:
+       /* cleanup other unsubmitted urbs */
+       ucan_cleanup_rx_urbs(up, urbs);
+       return -ENOMEM;
+}
+
+/* Submits rx urbs with the semantic: Either submit all, or cleanup
+ * everything. I case of errors submitted urbs are killed and all urbs in
+ * the array are freed. I case of no errors every entry in the urb
+ * array is set to NULL.
+ */
+static int ucan_submit_rx_urbs(struct ucan_priv *up, struct urb **urbs)
+{
+       int i, ret;
+
+       /* Iterate over all urbs to submit. On success remove the urb
+        * from the list.
+        */
+       for (i = 0; i < UCAN_MAX_RX_URBS; i++) {
+               ret = usb_submit_urb(urbs[i], GFP_KERNEL);
+               if (ret) {
+                       netdev_err(up->netdev,
+                                  "could not submit urb; code: %d\n",
+                                  ret);
+                       goto err;
+               }
+
+               /* Anchor URB and drop reference, USB core will take
+                * care of freeing it
+                */
+               usb_free_urb(urbs[i]);
+               urbs[i] = NULL;
+       }
+       return 0;
+
+err:
+       /* Cleanup unsubmitted urbs */
+       ucan_cleanup_rx_urbs(up, urbs);
+
+       /* Kill urbs that are already submitted */
+       usb_kill_anchored_urbs(&up->rx_urbs);
+
+       return ret;
+}
+
+/* Open the network device */
+static int ucan_open(struct net_device *netdev)
+{
+       int ret, ret_cleanup;
+       u16 ctrlmode;
+       struct urb *urbs[UCAN_MAX_RX_URBS];
+       struct ucan_priv *up = netdev_priv(netdev);
+
+       ret = ucan_alloc_context_array(up);
+       if (ret)
+               return ret;
+
+       /* Allocate and prepare IN URBS - allocated and anchored
+        * urbs are stored in urbs[] for clean
+        */
+       ret = ucan_prepare_and_anchor_rx_urbs(up, urbs);
+       if (ret)
+               goto err_contexts;
+
+       /* Check the control mode */
+       ctrlmode = 0;
+       if (up->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)
+               ctrlmode |= UCAN_MODE_LOOPBACK;
+       if (up->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
+               ctrlmode |= UCAN_MODE_SILENT;
+       if (up->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+               ctrlmode |= UCAN_MODE_3_SAMPLES;
+       if (up->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+               ctrlmode |= UCAN_MODE_ONE_SHOT;
+
+       /* Enable this in any case - filtering is down within the
+        * receive path
+        */
+       ctrlmode |= UCAN_MODE_BERR_REPORT;
+       up->ctl_msg_buffer->cmd_start.mode = cpu_to_le16(ctrlmode);
+
+       /* Driver is ready to receive data - start the USB device */
+       ret = ucan_ctrl_command_out(up, UCAN_COMMAND_START, 0, 2);
+       if (ret < 0) {
+               netdev_err(up->netdev,
+                          "could not start device, code: %d\n",
+                          ret);
+               goto err_reset;
+       }
+
+       /* Call CAN layer open */
+       ret = open_candev(netdev);
+       if (ret)
+               goto err_stop;
+
+       /* Driver is ready to receive data. Submit RX URBS */
+       ret = ucan_submit_rx_urbs(up, urbs);
+       if (ret)
+               goto err_stop;
+
+       up->can.state = CAN_STATE_ERROR_ACTIVE;
+
+       /* Start the network queue */
+       netif_start_queue(netdev);
+
+       return 0;
+
+err_stop:
+       /* The device have started already stop it */
+       ret_cleanup = ucan_ctrl_command_out(up, UCAN_COMMAND_STOP, 0, 0);
+       if (ret_cleanup < 0)
+               netdev_err(up->netdev,
+                          "could not stop device, code: %d\n",
+                          ret_cleanup);
+
+err_reset:
+       /* The device might have received data, reset it for
+        * consistent state
+        */
+       ret_cleanup = ucan_ctrl_command_out(up, UCAN_COMMAND_RESET, 0, 0);
+       if (ret_cleanup < 0)
+               netdev_err(up->netdev,
+                          "could not reset device, code: %d\n",
+                          ret_cleanup);
+
+       /* clean up unsubmitted urbs */
+       ucan_cleanup_rx_urbs(up, urbs);
+
+err_contexts:
+       ucan_release_context_array(up);
+       return ret;
+}
+
+static struct urb *ucan_prepare_tx_urb(struct ucan_priv *up,
+                                      struct ucan_urb_context *context,
+                                      struct can_frame *cf,
+                                      u8 echo_index)
+{
+       int mlen;
+       struct urb *urb;
+       struct ucan_message_out *m;
+
+       /* create a URB, and a buffer for it, and copy the data to the URB */
+       urb = usb_alloc_urb(0, GFP_ATOMIC);
+       if (!urb) {
+               netdev_err(up->netdev, "no memory left for URBs\n");
+               return NULL;
+       }
+
+       m = usb_alloc_coherent(up->udev,
+                              sizeof(struct ucan_message_out),
+                              GFP_ATOMIC,
+                              &urb->transfer_dma);
+       if (!m) {
+               netdev_err(up->netdev, "no memory left for USB buffer\n");
+               usb_free_urb(urb);
+               return NULL;
+       }
+
+       /* build the USB message */
+       m->type = UCAN_OUT_TX;
+       m->msg.can_msg.id = cpu_to_le32(cf->can_id);
+
+       if (cf->can_id & CAN_RTR_FLAG) {
+               mlen = UCAN_OUT_HDR_SIZE +
+                       offsetof(struct ucan_can_msg, dlc) +
+                       sizeof(m->msg.can_msg.dlc);
+               m->msg.can_msg.dlc = cf->can_dlc;
+       } else {
+               mlen = UCAN_OUT_HDR_SIZE +
+                       sizeof(m->msg.can_msg.id) + cf->can_dlc;
+               memcpy(m->msg.can_msg.data, cf->data, cf->can_dlc);
+       }
+       m->len = cpu_to_le16(mlen);
+
+       context->dlc = cf->can_dlc;
+
+       m->subtype = echo_index;
+
+       /* build the urb */
+       usb_fill_bulk_urb(urb, up->udev,
+                         usb_sndbulkpipe(up->udev,
+                                         up->out_ep_addr),
+                         m, mlen, ucan_write_bulk_callback, context);
+       urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+       return urb;
+}
+
+static void ucan_clean_up_tx_urb(struct ucan_priv *up, struct urb *urb)
+{
+       usb_free_coherent(up->udev, sizeof(struct ucan_message_out),
+                         urb->transfer_buffer, urb->transfer_dma);
+       usb_free_urb(urb);
+}
+
+/* callback when Linux needs to send a can frame */
+static netdev_tx_t ucan_start_xmit(struct sk_buff *skb,
+                                  struct net_device *netdev)
+{
+       unsigned long flags;
+       int ret;
+       u8 echo_index;
+       struct urb *urb;
+       struct ucan_urb_context *context;
+       struct ucan_priv *up = netdev_priv(netdev);
+       struct can_frame *cf = (struct can_frame *)skb->data;
+
+       /* check skb */
+       if (can_dropped_invalid_skb(netdev, skb))
+               return NETDEV_TX_OK;
+
+       /* allocate a context and slow down tx path, if fifo state is low */
+       context = ucan_alloc_context(up);
+       echo_index = context - up->context_array;
+
+       if (WARN_ON_ONCE(!context))
+               return NETDEV_TX_BUSY;
+
+       /* prepare urb for transmission */
+       urb = ucan_prepare_tx_urb(up, context, cf, echo_index);
+       if (!urb)
+               goto drop;
+
+       /* put the skb on can loopback stack */
+       spin_lock_irqsave(&up->echo_skb_lock, flags);
+       can_put_echo_skb(skb, up->netdev, echo_index);
+       spin_unlock_irqrestore(&up->echo_skb_lock, flags);
+
+       /* transmit it */
+       usb_anchor_urb(urb, &up->tx_urbs);
+       ret = usb_submit_urb(urb, GFP_ATOMIC);
+
+       /* cleanup urb */
+       if (ret) {
+               /* on error, clean up */
+               usb_unanchor_urb(urb);
+               ucan_clean_up_tx_urb(up, urb);
+               if (!ucan_release_context(up, context))
+                       netdev_err(up->netdev,
+                                  "xmit err: failed to release context\n");
+
+               /* remove the skb from the echo stack - this also
+                * frees the skb
+                */
+               spin_lock_irqsave(&up->echo_skb_lock, flags);
+               can_free_echo_skb(up->netdev, echo_index);
+               spin_unlock_irqrestore(&up->echo_skb_lock, flags);
+
+               if (ret == -ENODEV) {
+                       netif_device_detach(up->netdev);
+               } else {
+                       netdev_warn(up->netdev,
+                                   "xmit err: failed to submit urb %d\n",
+                                   ret);
+                       up->netdev->stats.tx_dropped++;
+               }
+               return NETDEV_TX_OK;
+       }
+
+       netif_trans_update(netdev);
+
+       /* release ref, as we do not need the urb anymore */
+       usb_free_urb(urb);
+
+       return NETDEV_TX_OK;
+
+drop:
+       if (!ucan_release_context(up, context))
+               netdev_err(up->netdev,
+                          "xmit drop: failed to release context\n");
+       dev_kfree_skb(skb);
+       up->netdev->stats.tx_dropped++;
+
+       return NETDEV_TX_OK;
+}
+
+/* Device goes down
+ *
+ * Clean up used resources
+ */
+static int ucan_close(struct net_device *netdev)
+{
+       int ret;
+       struct ucan_priv *up = netdev_priv(netdev);
+
+       up->can.state = CAN_STATE_STOPPED;
+
+       /* stop sending data */
+       usb_kill_anchored_urbs(&up->tx_urbs);
+
+       /* stop receiving data */
+       usb_kill_anchored_urbs(&up->rx_urbs);
+
+       /* stop and reset can device */
+       ret = ucan_ctrl_command_out(up, UCAN_COMMAND_STOP, 0, 0);
+       if (ret < 0)
+               netdev_err(up->netdev,
+                          "could not stop device, code: %d\n",
+                          ret);
+
+       ret = ucan_ctrl_command_out(up, UCAN_COMMAND_RESET, 0, 0);
+       if (ret < 0)
+               netdev_err(up->netdev,
+                          "could not reset device, code: %d\n",
+                          ret);
+
+       netif_stop_queue(netdev);
+
+       ucan_release_context_array(up);
+
+       close_candev(up->netdev);
+       return 0;
+}
+
+/* CAN driver callbacks */
+static const struct net_device_ops ucan_netdev_ops = {
+       .ndo_open = ucan_open,
+       .ndo_stop = ucan_close,
+       .ndo_start_xmit = ucan_start_xmit,
+       .ndo_change_mtu = can_change_mtu,
+};
+
+/* Request to set bittiming
+ *
+ * This function generates an USB set bittiming message and transmits
+ * it to the device
+ */
+static int ucan_set_bittiming(struct net_device *netdev)
+{
+       int ret;
+       struct ucan_priv *up = netdev_priv(netdev);
+       struct ucan_ctl_cmd_set_bittiming *cmd_set_bittiming;
+
+       cmd_set_bittiming = &up->ctl_msg_buffer->cmd_set_bittiming;
+       cmd_set_bittiming->tq = cpu_to_le32(up->can.bittiming.tq);
+       cmd_set_bittiming->brp = cpu_to_le16(up->can.bittiming.brp);
+       cmd_set_bittiming->sample_point =
+           cpu_to_le16(up->can.bittiming.sample_point);
+       cmd_set_bittiming->prop_seg = up->can.bittiming.prop_seg;
+       cmd_set_bittiming->phase_seg1 = up->can.bittiming.phase_seg1;
+       cmd_set_bittiming->phase_seg2 = up->can.bittiming.phase_seg2;
+       cmd_set_bittiming->sjw = up->can.bittiming.sjw;
+
+       ret = ucan_ctrl_command_out(up, UCAN_COMMAND_SET_BITTIMING, 0,
+                                   sizeof(*cmd_set_bittiming));
+       return (ret < 0) ? ret : 0;
+}
+
+/* Restart the device to get it out of BUS-OFF state.
+ * Called when the user runs "ip link set can1 type can restart".
+ */
+static int ucan_set_mode(struct net_device *netdev, enum can_mode mode)
+{
+       int ret;
+       unsigned long flags;
+       struct ucan_priv *up = netdev_priv(netdev);
+
+       switch (mode) {
+       case CAN_MODE_START:
+               netdev_dbg(up->netdev, "restarting device\n");
+
+               ret = ucan_ctrl_command_out(up, UCAN_COMMAND_RESTART, 0, 0);
+               up->can.state = CAN_STATE_ERROR_ACTIVE;
+
+               /* check if queue can be restarted,
+                * up->available_tx_urbs must be protected by the
+                * lock
+                */
+               spin_lock_irqsave(&up->context_lock, flags);
+
+               if (up->available_tx_urbs > 0)
+                       netif_wake_queue(up->netdev);
+
+               spin_unlock_irqrestore(&up->context_lock, flags);
+
+               return ret;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+/* Probe the device, reset it and gather general device information */
+static int ucan_probe(struct usb_interface *intf,
+                     const struct usb_device_id *id)
+{
+       int ret;
+       int i;
+       u32 protocol_version;
+       struct usb_device *udev;
+       struct net_device *netdev;
+       struct usb_host_interface *iface_desc;
+       struct ucan_priv *up;
+       struct usb_endpoint_descriptor *ep;
+       u16 in_ep_size;
+       u16 out_ep_size;
+       u8 in_ep_addr;
+       u8 out_ep_addr;
+       union ucan_ctl_payload *ctl_msg_buffer;
+       char firmware_str[sizeof(union ucan_ctl_payload) + 1];
+
+       udev = interface_to_usbdev(intf);
+
+       /* Stage 1 - Interface Parsing
+        * ---------------------------
+        *
+        * Identifie the device USB interface descriptor and its
+        * endpoints. Probing is aborted on errors.
+        */
+
+       /* check if the interface is sane */
+       iface_desc = intf->cur_altsetting;
+       if (!iface_desc)
+               return -ENODEV;
+
+       dev_info(&udev->dev,
+                "%s: probing device on interface #%d\n",
+                UCAN_DRIVER_NAME,
+                iface_desc->desc.bInterfaceNumber);
+
+       /* interface sanity check */
+       if (iface_desc->desc.bNumEndpoints != 2) {
+               dev_err(&udev->dev,
+                       "%s: invalid EP count (%d)",
+                       UCAN_DRIVER_NAME, iface_desc->desc.bNumEndpoints);
+               goto err_firmware_needs_update;
+       }
+
+       /* check interface endpoints */
+       in_ep_addr = 0;
+       out_ep_addr = 0;
+       in_ep_size = 0;
+       out_ep_size = 0;
+       for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
+               ep = &iface_desc->endpoint[i].desc;
+
+               if (((ep->bEndpointAddress & USB_ENDPOINT_DIR_MASK) != 0) &&
+                   ((ep->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
+                    USB_ENDPOINT_XFER_BULK)) {
+                       /* In Endpoint */
+                       in_ep_addr = ep->bEndpointAddress;
+                       in_ep_addr &= USB_ENDPOINT_NUMBER_MASK;
+                       in_ep_size = le16_to_cpu(ep->wMaxPacketSize);
+               } else if (((ep->bEndpointAddress & USB_ENDPOINT_DIR_MASK) ==
+                           0) &&
+                          ((ep->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
+                           USB_ENDPOINT_XFER_BULK)) {
+                       /* Out Endpoint */
+                       out_ep_addr = ep->bEndpointAddress;
+                       out_ep_addr &= USB_ENDPOINT_NUMBER_MASK;
+                       out_ep_size = le16_to_cpu(ep->wMaxPacketSize);
+               }
+       }
+
+       /* check if interface is sane */
+       if (!in_ep_addr || !out_ep_addr) {
+               dev_err(&udev->dev, "%s: invalid endpoint configuration\n",
+                       UCAN_DRIVER_NAME);
+               goto err_firmware_needs_update;
+       }
+       if (in_ep_size < sizeof(struct ucan_message_in)) {
+               dev_err(&udev->dev, "%s: invalid in_ep MaxPacketSize\n",
+                       UCAN_DRIVER_NAME);
+               goto err_firmware_needs_update;
+       }
+       if (out_ep_size < sizeof(struct ucan_message_out)) {
+               dev_err(&udev->dev, "%s: invalid out_ep MaxPacketSize\n",
+                       UCAN_DRIVER_NAME);
+               goto err_firmware_needs_update;
+       }
+
+       /* Stage 2 - Device Identification
+        * -------------------------------
+        *
+        * The device interface seems to be a ucan device. Do further
+        * compatibility checks. On error probing is aborted, on
+        * success this stage leaves the ctl_msg_buffer with the
+        * reported contents of a GET_INFO command (supported
+        * bittimings, tx_fifo depth). This information is used in
+        * Stage 3 for the final driver initialisation.
+        */
+
+       /* Prepare Memory for control transferes */
+       ctl_msg_buffer = devm_kzalloc(&udev->dev,
+                                     sizeof(union ucan_ctl_payload),
+                                     GFP_KERNEL);
+       if (!ctl_msg_buffer) {
+               dev_err(&udev->dev,
+                       "%s: failed to allocate control pipe memory\n",
+                       UCAN_DRIVER_NAME);
+               return -ENOMEM;
+       }
+
+       /* get protocol version
+        *
+        * note: ucan_ctrl_command_* wrappers cannot be used yet
+        * because `up` is initialised in Stage 3
+        */
+       ret = usb_control_msg(udev,
+                             usb_rcvctrlpipe(udev, 0),
+                             UCAN_COMMAND_GET,
+                             USB_DIR_IN | USB_TYPE_VENDOR |
+                                       USB_RECIP_INTERFACE,
+                             UCAN_COMMAND_GET_PROTOCOL_VERSION,
+                             iface_desc->desc.bInterfaceNumber,
+                             ctl_msg_buffer,
+                             sizeof(union ucan_ctl_payload),
+                             UCAN_USB_CTL_PIPE_TIMEOUT);
+
+       /* older firmware version do not support this command - those
+        * are not supported by this drive
+        */
+       if (ret != 4) {
+               dev_err(&udev->dev,
+                       "%s: could not read protocol version, ret=%d\n",
+                       UCAN_DRIVER_NAME, ret);
+               if (ret >= 0)
+                       ret = -EINVAL;
+               goto err_firmware_needs_update;
+       }
+
+       /* this driver currently supports protocol version 3 only */
+       protocol_version =
+               le32_to_cpu(ctl_msg_buffer->cmd_get_protocol_version.version);
+       if (protocol_version < UCAN_PROTOCOL_VERSION_MIN ||
+           protocol_version > UCAN_PROTOCOL_VERSION_MAX) {
+               dev_err(&udev->dev,
+                       "%s: device protocol version %d is not supported\n",
+                       UCAN_DRIVER_NAME, protocol_version);
+               goto err_firmware_needs_update;
+       }
+
+       /* request the device information and store it in ctl_msg_buffer
+        *
+        * note: ucan_ctrl_command_* wrappers connot be used yet
+        * because `up` is initialised in Stage 3
+        */
+       ret = usb_control_msg(udev,
+                             usb_rcvctrlpipe(udev, 0),
+                             UCAN_COMMAND_GET,
+                             USB_DIR_IN | USB_TYPE_VENDOR |
+                                       USB_RECIP_INTERFACE,
+                             UCAN_COMMAND_GET_INFO,
+                             iface_desc->desc.bInterfaceNumber,
+                             ctl_msg_buffer,
+                             sizeof(ctl_msg_buffer->cmd_get_device_info),
+                             UCAN_USB_CTL_PIPE_TIMEOUT);
+
+       if (ret < 0) {
+               dev_err(&udev->dev, "%s: failed to retrieve device info\n",
+                       UCAN_DRIVER_NAME);
+               goto err_firmware_needs_update;
+       }
+       if (ret < sizeof(ctl_msg_buffer->cmd_get_device_info)) {
+               dev_err(&udev->dev, "%s: device reported invalid device info\n",
+                       UCAN_DRIVER_NAME);
+               goto err_firmware_needs_update;
+       }
+       if (ctl_msg_buffer->cmd_get_device_info.tx_fifo == 0) {
+               dev_err(&udev->dev,
+                       "%s: device reported invalid tx-fifo size\n",
+                       UCAN_DRIVER_NAME);
+               goto err_firmware_needs_update;
+       }
+
+       /* Stage 3 - Driver Initialisation
+        * -------------------------------
+        *
+        * Register device to Linux, prepare private structures and
+        * reset the device.
+        */
+
+       /* allocate driver resources */
+       netdev = alloc_candev(sizeof(struct ucan_priv),
+                             ctl_msg_buffer->cmd_get_device_info.tx_fifo);
+       if (!netdev) {
+               dev_err(&udev->dev,
+                       "%s: cannot allocate candev\n", UCAN_DRIVER_NAME);
+               return -ENOMEM;
+       }
+
+       up = netdev_priv(netdev);
+
+       /* initialze data */
+       up->udev = udev;
+       up->intf = intf;
+       up->netdev = netdev;
+       up->intf_index = iface_desc->desc.bInterfaceNumber;
+       up->in_ep_addr = in_ep_addr;
+       up->out_ep_addr = out_ep_addr;
+       up->in_ep_size = in_ep_size;
+       up->ctl_msg_buffer = ctl_msg_buffer;
+       up->context_array = NULL;
+       up->available_tx_urbs = 0;
+
+       up->can.state = CAN_STATE_STOPPED;
+       up->can.bittiming_const = &up->device_info.bittiming_const;
+       up->can.do_set_bittiming = ucan_set_bittiming;
+       up->can.do_set_mode = &ucan_set_mode;
+       spin_lock_init(&up->context_lock);
+       spin_lock_init(&up->echo_skb_lock);
+       netdev->netdev_ops = &ucan_netdev_ops;
+
+       usb_set_intfdata(intf, up);
+       SET_NETDEV_DEV(netdev, &intf->dev);
+
+       /* parse device information
+        * the data retrieved in Stage 2 is still available in
+        * up->ctl_msg_buffer
+        */
+       ucan_parse_device_info(up, &ctl_msg_buffer->cmd_get_device_info);
+
+       /* just print some device information - if available */
+       ret = ucan_device_request_in(up, UCAN_DEVICE_GET_FW_STRING, 0,
+                                    sizeof(union ucan_ctl_payload));
+       if (ret > 0) {
+               /* copy string while ensuring zero terminiation */
+               strncpy(firmware_str, up->ctl_msg_buffer->raw,
+                       sizeof(union ucan_ctl_payload));
+               firmware_str[sizeof(union ucan_ctl_payload)] = '\0';
+       } else {
+               strcpy(firmware_str, "unknown");
+       }
+
+       /* device is compatible, reset it */
+       ret = ucan_ctrl_command_out(up, UCAN_COMMAND_RESET, 0, 0);
+       if (ret < 0)
+               goto err_free_candev;
+
+       init_usb_anchor(&up->rx_urbs);
+       init_usb_anchor(&up->tx_urbs);
+
+       up->can.state = CAN_STATE_STOPPED;
+
+       /* register the device */
+       ret = register_candev(netdev);
+       if (ret)
+               goto err_free_candev;
+
+       /* initialisation complete, log device info */
+       netdev_info(up->netdev, "registered device\n");
+       netdev_info(up->netdev, "firmware string: %s\n", firmware_str);
+
+       /* success */
+       return 0;
+
+err_free_candev:
+       free_candev(netdev);
+       return ret;
+
+err_firmware_needs_update:
+       dev_err(&udev->dev,
+               "%s: probe failed; try to update the device firmware\n",
+               UCAN_DRIVER_NAME);
+       return -ENODEV;
+}
+
+/* disconnect the device */
+static void ucan_disconnect(struct usb_interface *intf)
+{
+       struct usb_device *udev;
+       struct ucan_priv *up = usb_get_intfdata(intf);
+
+       udev = interface_to_usbdev(intf);
+
+       usb_set_intfdata(intf, NULL);
+
+       if (up) {
+               unregister_netdev(up->netdev);
+               free_candev(up->netdev);
+       }
+}
+
+static struct usb_device_id ucan_table[] = {
+       /* Mule (soldered onto compute modules) */
+       {USB_DEVICE_INTERFACE_NUMBER(0x2294, 0x425a, 0)},
+       /* Seal (standalone USB stick) */
+       {USB_DEVICE_INTERFACE_NUMBER(0x2294, 0x425b, 0)},
+       {} /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, ucan_table);
+/* driver callbacks */
+static struct usb_driver ucan_driver = {
+       .name = UCAN_DRIVER_NAME,
+       .probe = ucan_probe,
+       .disconnect = ucan_disconnect,
+       .id_table = ucan_table,
+};
+
+module_usb_driver(ucan_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Martin Elshuber <martin.elshuber@theobroma-systems.com>");
+MODULE_AUTHOR("Jakob Unterwurzacher <jakob.unterwurzacher@theobroma-systems.com>");
+MODULE_DESCRIPTION("Driver for Theobroma Systems UCAN devices");
index 5a24039733efd23255142c4abc0d2b758d188554..045f0845e665e42a0d588cef735d2230c60a7b13 100644 (file)
@@ -2,7 +2,7 @@
  *
  * Copyright (C) 2012 - 2014 Xilinx, Inc.
  * Copyright (C) 2009 PetaLogix. All rights reserved.
- * Copyright (C) 2017 Sandvik Mining and Construction Oy
+ * Copyright (C) 2017 - 2018 Sandvik Mining and Construction Oy
  *
  * Description:
  * This driver is developed for Axi CAN IP and for Zynq CANPS Controller.
@@ -51,16 +51,34 @@ enum xcan_reg {
        XCAN_ISR_OFFSET         = 0x1C, /* Interrupt status */
        XCAN_IER_OFFSET         = 0x20, /* Interrupt enable */
        XCAN_ICR_OFFSET         = 0x24, /* Interrupt clear */
-       XCAN_TXFIFO_ID_OFFSET   = 0x30,/* TX FIFO ID */
-       XCAN_TXFIFO_DLC_OFFSET  = 0x34, /* TX FIFO DLC */
-       XCAN_TXFIFO_DW1_OFFSET  = 0x38, /* TX FIFO Data Word 1 */
-       XCAN_TXFIFO_DW2_OFFSET  = 0x3C, /* TX FIFO Data Word 2 */
-       XCAN_RXFIFO_ID_OFFSET   = 0x50, /* RX FIFO ID */
-       XCAN_RXFIFO_DLC_OFFSET  = 0x54, /* RX FIFO DLC */
-       XCAN_RXFIFO_DW1_OFFSET  = 0x58, /* RX FIFO Data Word 1 */
-       XCAN_RXFIFO_DW2_OFFSET  = 0x5C, /* RX FIFO Data Word 2 */
+
+       /* not on CAN FD cores */
+       XCAN_TXFIFO_OFFSET      = 0x30, /* TX FIFO base */
+       XCAN_RXFIFO_OFFSET      = 0x50, /* RX FIFO base */
+       XCAN_AFR_OFFSET         = 0x60, /* Acceptance Filter */
+
+       /* only on CAN FD cores */
+       XCAN_TRR_OFFSET         = 0x0090, /* TX Buffer Ready Request */
+       XCAN_AFR_EXT_OFFSET     = 0x00E0, /* Acceptance Filter */
+       XCAN_FSR_OFFSET         = 0x00E8, /* RX FIFO Status */
+       XCAN_TXMSG_BASE_OFFSET  = 0x0100, /* TX Message Space */
+       XCAN_RXMSG_BASE_OFFSET  = 0x1100, /* RX Message Space */
 };
 
+#define XCAN_FRAME_ID_OFFSET(frame_base)       ((frame_base) + 0x00)
+#define XCAN_FRAME_DLC_OFFSET(frame_base)      ((frame_base) + 0x04)
+#define XCAN_FRAME_DW1_OFFSET(frame_base)      ((frame_base) + 0x08)
+#define XCAN_FRAME_DW2_OFFSET(frame_base)      ((frame_base) + 0x0C)
+
+#define XCAN_CANFD_FRAME_SIZE          0x48
+#define XCAN_TXMSG_FRAME_OFFSET(n)     (XCAN_TXMSG_BASE_OFFSET + \
+                                        XCAN_CANFD_FRAME_SIZE * (n))
+#define XCAN_RXMSG_FRAME_OFFSET(n)     (XCAN_RXMSG_BASE_OFFSET + \
+                                        XCAN_CANFD_FRAME_SIZE * (n))
+
+/* the single TX mailbox used by this driver on CAN FD HW */
+#define XCAN_TX_MAILBOX_IDX            0
+
 /* CAN register bit masks - XCAN_<REG>_<BIT>_MASK */
 #define XCAN_SRR_CEN_MASK              0x00000002 /* CAN enable */
 #define XCAN_SRR_RESET_MASK            0x00000001 /* Soft Reset the CAN core */
@@ -70,6 +88,9 @@ enum xcan_reg {
 #define XCAN_BTR_SJW_MASK              0x00000180 /* Synchronous jump width */
 #define XCAN_BTR_TS2_MASK              0x00000070 /* Time segment 2 */
 #define XCAN_BTR_TS1_MASK              0x0000000F /* Time segment 1 */
+#define XCAN_BTR_SJW_MASK_CANFD                0x000F0000 /* Synchronous jump width */
+#define XCAN_BTR_TS2_MASK_CANFD                0x00000F00 /* Time segment 2 */
+#define XCAN_BTR_TS1_MASK_CANFD                0x0000003F /* Time segment 1 */
 #define XCAN_ECR_REC_MASK              0x0000FF00 /* Receive error counter */
 #define XCAN_ECR_TEC_MASK              0x000000FF /* Transmit error counter */
 #define XCAN_ESR_ACKER_MASK            0x00000010 /* ACK error */
@@ -83,6 +104,7 @@ enum xcan_reg {
 #define XCAN_SR_NORMAL_MASK            0x00000008 /* Normal mode */
 #define XCAN_SR_LBACK_MASK             0x00000002 /* Loop back mode */
 #define XCAN_SR_CONFIG_MASK            0x00000001 /* Configuration mode */
+#define XCAN_IXR_RXMNF_MASK            0x00020000 /* RX match not finished */
 #define XCAN_IXR_TXFEMP_MASK           0x00004000 /* TX FIFO Empty */
 #define XCAN_IXR_WKUP_MASK             0x00000800 /* Wake up interrupt */
 #define XCAN_IXR_SLP_MASK              0x00000400 /* Sleep interrupt */
@@ -100,15 +122,15 @@ enum xcan_reg {
 #define XCAN_IDR_ID2_MASK              0x0007FFFE /* Extended message ident */
 #define XCAN_IDR_RTR_MASK              0x00000001 /* Remote TX request */
 #define XCAN_DLCR_DLC_MASK             0xF0000000 /* Data length code */
-
-#define XCAN_INTR_ALL          (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\
-                                XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \
-                                XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \
-                                XCAN_IXR_RXOFLW_MASK | XCAN_IXR_ARBLST_MASK)
+#define XCAN_FSR_FL_MASK               0x00003F00 /* RX Fill Level */
+#define XCAN_FSR_IRI_MASK              0x00000080 /* RX Increment Read Index */
+#define XCAN_FSR_RI_MASK               0x0000001F /* RX Read Index */
 
 /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */
 #define XCAN_BTR_SJW_SHIFT             7  /* Synchronous jump width */
 #define XCAN_BTR_TS2_SHIFT             4  /* Time segment 2 */
+#define XCAN_BTR_SJW_SHIFT_CANFD       16 /* Synchronous jump width */
+#define XCAN_BTR_TS2_SHIFT_CANFD       8  /* Time segment 2 */
 #define XCAN_IDR_ID1_SHIFT             21 /* Standard Messg Identifier */
 #define XCAN_IDR_ID2_SHIFT             1  /* Extended Message Identifier */
 #define XCAN_DLCR_DLC_SHIFT            28 /* Data length code */
@@ -118,6 +140,27 @@ enum xcan_reg {
 #define XCAN_FRAME_MAX_DATA_LEN                8
 #define XCAN_TIMEOUT                   (1 * HZ)
 
+/* TX-FIFO-empty interrupt available */
+#define XCAN_FLAG_TXFEMP       0x0001
+/* RX Match Not Finished interrupt available */
+#define XCAN_FLAG_RXMNF                0x0002
+/* Extended acceptance filters with control at 0xE0 */
+#define XCAN_FLAG_EXT_FILTERS  0x0004
+/* TX mailboxes instead of TX FIFO */
+#define XCAN_FLAG_TX_MAILBOXES 0x0008
+/* RX FIFO with each buffer in separate registers at 0x1100
+ * instead of the regular FIFO at 0x50
+ */
+#define XCAN_FLAG_RX_FIFO_MULTI        0x0010
+
+struct xcan_devtype_data {
+       unsigned int flags;
+       const struct can_bittiming_const *bittiming_const;
+       const char *bus_clk_name;
+       unsigned int btr_ts2_shift;
+       unsigned int btr_sjw_shift;
+};
+
 /**
  * struct xcan_priv - This definition define CAN driver instance
  * @can:                       CAN private data structure.
@@ -133,6 +176,7 @@ enum xcan_reg {
  * @irq_flags:                 For request_irq()
  * @bus_clk:                   Pointer to struct clk
  * @can_clk:                   Pointer to struct clk
+ * @devtype:                   Device type specific constants
  */
 struct xcan_priv {
        struct can_priv can;
@@ -149,6 +193,7 @@ struct xcan_priv {
        unsigned long irq_flags;
        struct clk *bus_clk;
        struct clk *can_clk;
+       struct xcan_devtype_data devtype;
 };
 
 /* CAN Bittiming constants as per Xilinx CAN specs */
@@ -164,9 +209,16 @@ static const struct can_bittiming_const xcan_bittiming_const = {
        .brp_inc = 1,
 };
 
-#define XCAN_CAP_WATERMARK     0x0001
-struct xcan_devtype_data {
-       unsigned int caps;
+static const struct can_bittiming_const xcan_bittiming_const_canfd = {
+       .name = DRIVER_NAME,
+       .tseg1_min = 1,
+       .tseg1_max = 64,
+       .tseg2_min = 1,
+       .tseg2_max = 16,
+       .sjw_max = 16,
+       .brp_min = 1,
+       .brp_max = 256,
+       .brp_inc = 1,
 };
 
 /**
@@ -223,6 +275,23 @@ static u32 xcan_read_reg_be(const struct xcan_priv *priv, enum xcan_reg reg)
        return ioread32be(priv->reg_base + reg);
 }
 
+/**
+ * xcan_rx_int_mask - Get the mask for the receive interrupt
+ * @priv:      Driver private data structure
+ *
+ * Return: The receive interrupt mask used by the driver on this HW
+ */
+static u32 xcan_rx_int_mask(const struct xcan_priv *priv)
+{
+       /* RXNEMP is better suited for our use case as it cannot be cleared
+        * while the FIFO is non-empty, but CAN FD HW does not have it
+        */
+       if (priv->devtype.flags & XCAN_FLAG_RX_FIFO_MULTI)
+               return XCAN_IXR_RXOK_MASK;
+       else
+               return XCAN_IXR_RXNEMP_MASK;
+}
+
 /**
  * set_reset_mode - Resets the CAN device mode
  * @ndev:      Pointer to net_device structure
@@ -287,10 +356,10 @@ static int xcan_set_bittiming(struct net_device *ndev)
        btr1 = (bt->prop_seg + bt->phase_seg1 - 1);
 
        /* Setting Time Segment 2 in BTR Register */
-       btr1 |= (bt->phase_seg2 - 1) << XCAN_BTR_TS2_SHIFT;
+       btr1 |= (bt->phase_seg2 - 1) << priv->devtype.btr_ts2_shift;
 
        /* Setting Synchronous jump width in BTR Register */
-       btr1 |= (bt->sjw - 1) << XCAN_BTR_SJW_SHIFT;
+       btr1 |= (bt->sjw - 1) << priv->devtype.btr_sjw_shift;
 
        priv->write_reg(priv, XCAN_BRPR_OFFSET, btr0);
        priv->write_reg(priv, XCAN_BTR_OFFSET, btr1);
@@ -318,6 +387,7 @@ static int xcan_chip_start(struct net_device *ndev)
        u32 reg_msr, reg_sr_mask;
        int err;
        unsigned long timeout;
+       u32 ier;
 
        /* Check if it is in reset mode */
        err = set_reset_mode(ndev);
@@ -329,7 +399,15 @@ static int xcan_chip_start(struct net_device *ndev)
                return err;
 
        /* Enable interrupts */
-       priv->write_reg(priv, XCAN_IER_OFFSET, XCAN_INTR_ALL);
+       ier = XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |
+               XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK |
+               XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
+               XCAN_IXR_ARBLST_MASK | xcan_rx_int_mask(priv);
+
+       if (priv->devtype.flags & XCAN_FLAG_RXMNF)
+               ier |= XCAN_IXR_RXMNF_MASK;
+
+       priv->write_reg(priv, XCAN_IER_OFFSET, ier);
 
        /* Check whether it is loopback mode or normal mode  */
        if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) {
@@ -340,6 +418,12 @@ static int xcan_chip_start(struct net_device *ndev)
                reg_sr_mask = XCAN_SR_NORMAL_MASK;
        }
 
+       /* enable the first extended filter, if any, as cores with extended
+        * filtering default to non-receipt if all filters are disabled
+        */
+       if (priv->devtype.flags & XCAN_FLAG_EXT_FILTERS)
+               priv->write_reg(priv, XCAN_AFR_EXT_OFFSET, 0x00000001);
+
        priv->write_reg(priv, XCAN_MSR_OFFSET, reg_msr);
        priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_CEN_MASK);
 
@@ -390,34 +474,15 @@ static int xcan_do_set_mode(struct net_device *ndev, enum can_mode mode)
 }
 
 /**
- * xcan_start_xmit - Starts the transmission
- * @skb:       sk_buff pointer that contains data to be Txed
- * @ndev:      Pointer to net_device structure
- *
- * This function is invoked from upper layers to initiate transmission. This
- * function uses the next available free txbuff and populates their fields to
- * start the transmission.
- *
- * Return: 0 on success and failure value on error
+ * xcan_write_frame - Write a frame to HW
+ * @skb:               sk_buff pointer that contains data to be Txed
+ * @frame_offset:      Register offset to write the frame to
  */
-static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static void xcan_write_frame(struct xcan_priv *priv, struct sk_buff *skb,
+                            int frame_offset)
 {
-       struct xcan_priv *priv = netdev_priv(ndev);
-       struct net_device_stats *stats = &ndev->stats;
-       struct can_frame *cf = (struct can_frame *)skb->data;
        u32 id, dlc, data[2] = {0, 0};
-       unsigned long flags;
-
-       if (can_dropped_invalid_skb(ndev, skb))
-               return NETDEV_TX_OK;
-
-       /* Check if the TX buffer is full */
-       if (unlikely(priv->read_reg(priv, XCAN_SR_OFFSET) &
-                       XCAN_SR_TXFLL_MASK)) {
-               netif_stop_queue(ndev);
-               netdev_err(ndev, "BUG!, TX FIFO full when queue awake!\n");
-               return NETDEV_TX_BUSY;
-       }
+       struct can_frame *cf = (struct can_frame *)skb->data;
 
        /* Watch carefully on the bit sequence */
        if (cf->can_id & CAN_EFF_FLAG) {
@@ -453,24 +518,44 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        if (cf->can_dlc > 4)
                data[1] = be32_to_cpup((__be32 *)(cf->data + 4));
 
+       priv->write_reg(priv, XCAN_FRAME_ID_OFFSET(frame_offset), id);
+       /* If the CAN frame is RTR frame this write triggers transmission
+        * (not on CAN FD)
+        */
+       priv->write_reg(priv, XCAN_FRAME_DLC_OFFSET(frame_offset), dlc);
+       if (!(cf->can_id & CAN_RTR_FLAG)) {
+               priv->write_reg(priv, XCAN_FRAME_DW1_OFFSET(frame_offset),
+                               data[0]);
+               /* If the CAN frame is Standard/Extended frame this
+                * write triggers transmission (not on CAN FD)
+                */
+               priv->write_reg(priv, XCAN_FRAME_DW2_OFFSET(frame_offset),
+                               data[1]);
+       }
+}
+
+/**
+ * xcan_start_xmit_fifo - Starts the transmission (FIFO mode)
+ *
+ * Return: 0 on success, -ENOSPC if FIFO is full.
+ */
+static int xcan_start_xmit_fifo(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct xcan_priv *priv = netdev_priv(ndev);
+       unsigned long flags;
+
+       /* Check if the TX buffer is full */
+       if (unlikely(priv->read_reg(priv, XCAN_SR_OFFSET) &
+                       XCAN_SR_TXFLL_MASK))
+               return -ENOSPC;
+
        can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max);
 
        spin_lock_irqsave(&priv->tx_lock, flags);
 
        priv->tx_head++;
 
-       /* Write the Frame to Xilinx CAN TX FIFO */
-       priv->write_reg(priv, XCAN_TXFIFO_ID_OFFSET, id);
-       /* If the CAN frame is RTR frame this write triggers tranmission */
-       priv->write_reg(priv, XCAN_TXFIFO_DLC_OFFSET, dlc);
-       if (!(cf->can_id & CAN_RTR_FLAG)) {
-               priv->write_reg(priv, XCAN_TXFIFO_DW1_OFFSET, data[0]);
-               /* If the CAN frame is Standard/Extended frame this
-                * write triggers tranmission
-                */
-               priv->write_reg(priv, XCAN_TXFIFO_DW2_OFFSET, data[1]);
-               stats->tx_bytes += cf->can_dlc;
-       }
+       xcan_write_frame(priv, skb, XCAN_TXFIFO_OFFSET);
 
        /* Clear TX-FIFO-empty interrupt for xcan_tx_interrupt() */
        if (priv->tx_max > 1)
@@ -482,6 +567,70 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
        spin_unlock_irqrestore(&priv->tx_lock, flags);
 
+       return 0;
+}
+
+/**
+ * xcan_start_xmit_mailbox - Starts the transmission (mailbox mode)
+ *
+ * Return: 0 on success, -ENOSPC if there is no space
+ */
+static int xcan_start_xmit_mailbox(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct xcan_priv *priv = netdev_priv(ndev);
+       unsigned long flags;
+
+       if (unlikely(priv->read_reg(priv, XCAN_TRR_OFFSET) &
+                    BIT(XCAN_TX_MAILBOX_IDX)))
+               return -ENOSPC;
+
+       can_put_echo_skb(skb, ndev, 0);
+
+       spin_lock_irqsave(&priv->tx_lock, flags);
+
+       priv->tx_head++;
+
+       xcan_write_frame(priv, skb,
+                        XCAN_TXMSG_FRAME_OFFSET(XCAN_TX_MAILBOX_IDX));
+
+       /* Mark buffer as ready for transmit */
+       priv->write_reg(priv, XCAN_TRR_OFFSET, BIT(XCAN_TX_MAILBOX_IDX));
+
+       netif_stop_queue(ndev);
+
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+       return 0;
+}
+
+/**
+ * xcan_start_xmit - Starts the transmission
+ * @skb:       sk_buff pointer that contains data to be Txed
+ * @ndev:      Pointer to net_device structure
+ *
+ * This function is invoked from upper layers to initiate transmission.
+ *
+ * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY when the tx queue is full
+ */
+static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct xcan_priv *priv = netdev_priv(ndev);
+       int ret;
+
+       if (can_dropped_invalid_skb(ndev, skb))
+               return NETDEV_TX_OK;
+
+       if (priv->devtype.flags & XCAN_FLAG_TX_MAILBOXES)
+               ret = xcan_start_xmit_mailbox(skb, ndev);
+       else
+               ret = xcan_start_xmit_fifo(skb, ndev);
+
+       if (ret < 0) {
+               netdev_err(ndev, "BUG!, TX full when queue awake!\n");
+               netif_stop_queue(ndev);
+               return NETDEV_TX_BUSY;
+       }
+
        return NETDEV_TX_OK;
 }
 
@@ -489,13 +638,14 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
  * xcan_rx -  Is called from CAN isr to complete the received
  *             frame  processing
  * @ndev:      Pointer to net_device structure
+ * @frame_base:        Register offset to the frame to be read
  *
  * This function is invoked from the CAN isr(poll) to process the Rx frames. It
  * does minimal processing and invokes "netif_receive_skb" to complete further
  * processing.
  * Return: 1 on success and 0 on failure.
  */
-static int xcan_rx(struct net_device *ndev)
+static int xcan_rx(struct net_device *ndev, int frame_base)
 {
        struct xcan_priv *priv = netdev_priv(ndev);
        struct net_device_stats *stats = &ndev->stats;
@@ -510,9 +660,9 @@ static int xcan_rx(struct net_device *ndev)
        }
 
        /* Read a frame from Xilinx zynq CANPS */
-       id_xcan = priv->read_reg(priv, XCAN_RXFIFO_ID_OFFSET);
-       dlc = priv->read_reg(priv, XCAN_RXFIFO_DLC_OFFSET) >>
-                               XCAN_DLCR_DLC_SHIFT;
+       id_xcan = priv->read_reg(priv, XCAN_FRAME_ID_OFFSET(frame_base));
+       dlc = priv->read_reg(priv, XCAN_FRAME_DLC_OFFSET(frame_base)) >>
+                                  XCAN_DLCR_DLC_SHIFT;
 
        /* Change Xilinx CAN data length format to socketCAN data format */
        cf->can_dlc = get_can_dlc(dlc);
@@ -535,8 +685,8 @@ static int xcan_rx(struct net_device *ndev)
        }
 
        /* DW1/DW2 must always be read to remove message from RXFIFO */
-       data[0] = priv->read_reg(priv, XCAN_RXFIFO_DW1_OFFSET);
-       data[1] = priv->read_reg(priv, XCAN_RXFIFO_DW2_OFFSET);
+       data[0] = priv->read_reg(priv, XCAN_FRAME_DW1_OFFSET(frame_base));
+       data[1] = priv->read_reg(priv, XCAN_FRAME_DW2_OFFSET(frame_base));
 
        if (!(cf->can_id & CAN_RTR_FLAG)) {
                /* Change Xilinx CAN data format to socketCAN data format */
@@ -594,39 +744,19 @@ static void xcan_set_error_state(struct net_device *ndev,
        u32 ecr = priv->read_reg(priv, XCAN_ECR_OFFSET);
        u32 txerr = ecr & XCAN_ECR_TEC_MASK;
        u32 rxerr = (ecr & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT;
+       enum can_state tx_state = txerr >= rxerr ? new_state : 0;
+       enum can_state rx_state = txerr <= rxerr ? new_state : 0;
+
+       /* non-ERROR states are handled elsewhere */
+       if (WARN_ON(new_state > CAN_STATE_ERROR_PASSIVE))
+               return;
 
-       priv->can.state = new_state;
+       can_change_state(ndev, cf, tx_state, rx_state);
 
        if (cf) {
-               cf->can_id |= CAN_ERR_CRTL;
                cf->data[6] = txerr;
                cf->data[7] = rxerr;
        }
-
-       switch (new_state) {
-       case CAN_STATE_ERROR_PASSIVE:
-               priv->can.can_stats.error_passive++;
-               if (cf)
-                       cf->data[1] = (rxerr > 127) ?
-                                       CAN_ERR_CRTL_RX_PASSIVE :
-                                       CAN_ERR_CRTL_TX_PASSIVE;
-               break;
-       case CAN_STATE_ERROR_WARNING:
-               priv->can.can_stats.error_warning++;
-               if (cf)
-                       cf->data[1] |= (txerr > rxerr) ?
-                                       CAN_ERR_CRTL_TX_WARNING :
-                                       CAN_ERR_CRTL_RX_WARNING;
-               break;
-       case CAN_STATE_ERROR_ACTIVE:
-               if (cf)
-                       cf->data[1] |= CAN_ERR_CRTL_ACTIVE;
-               break;
-       default:
-               /* non-ERROR states are handled elsewhere */
-               WARN_ON(1);
-               break;
-       }
 }
 
 /**
@@ -703,7 +833,8 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
        } else {
                enum can_state new_state = xcan_current_error_state(ndev);
 
-               xcan_set_error_state(ndev, new_state, skb ? cf : NULL);
+               if (new_state != priv->can.state)
+                       xcan_set_error_state(ndev, new_state, skb ? cf : NULL);
        }
 
        /* Check for Arbitration lost interrupt */
@@ -725,6 +856,17 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
                }
        }
 
+       /* Check for RX Match Not Finished interrupt */
+       if (isr & XCAN_IXR_RXMNF_MASK) {
+               stats->rx_dropped++;
+               stats->rx_errors++;
+               netdev_err(ndev, "RX match not finished, frame discarded\n");
+               if (skb) {
+                       cf->can_id |= CAN_ERR_CRTL;
+                       cf->data[1] |= CAN_ERR_CRTL_UNSPEC;
+               }
+       }
+
        /* Check for error interrupt */
        if (isr & XCAN_IXR_ERROR_MASK) {
                if (skb)
@@ -808,6 +950,44 @@ static void xcan_state_interrupt(struct net_device *ndev, u32 isr)
                priv->can.state = CAN_STATE_ERROR_ACTIVE;
 }
 
+/**
+ * xcan_rx_fifo_get_next_frame - Get register offset of next RX frame
+ *
+ * Return: Register offset of the next frame in RX FIFO.
+ */
+static int xcan_rx_fifo_get_next_frame(struct xcan_priv *priv)
+{
+       int offset;
+
+       if (priv->devtype.flags & XCAN_FLAG_RX_FIFO_MULTI) {
+               u32 fsr;
+
+               /* clear RXOK before the is-empty check so that any newly
+                * received frame will reassert it without a race
+                */
+               priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXOK_MASK);
+
+               fsr = priv->read_reg(priv, XCAN_FSR_OFFSET);
+
+               /* check if RX FIFO is empty */
+               if (!(fsr & XCAN_FSR_FL_MASK))
+                       return -ENOENT;
+
+               offset = XCAN_RXMSG_FRAME_OFFSET(fsr & XCAN_FSR_RI_MASK);
+
+       } else {
+               /* check if RX FIFO is empty */
+               if (!(priv->read_reg(priv, XCAN_ISR_OFFSET) &
+                     XCAN_IXR_RXNEMP_MASK))
+                       return -ENOENT;
+
+               /* frames are read from a static offset */
+               offset = XCAN_RXFIFO_OFFSET;
+       }
+
+       return offset;
+}
+
 /**
  * xcan_rx_poll - Poll routine for rx packets (NAPI)
  * @napi:      napi structure pointer
@@ -822,14 +1002,24 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota)
 {
        struct net_device *ndev = napi->dev;
        struct xcan_priv *priv = netdev_priv(ndev);
-       u32 isr, ier;
+       u32 ier;
        int work_done = 0;
-
-       isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
-       while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) {
-               work_done += xcan_rx(ndev);
-               priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK);
-               isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
+       int frame_offset;
+
+       while ((frame_offset = xcan_rx_fifo_get_next_frame(priv)) >= 0 &&
+              (work_done < quota)) {
+               work_done += xcan_rx(ndev, frame_offset);
+
+               if (priv->devtype.flags & XCAN_FLAG_RX_FIFO_MULTI)
+                       /* increment read index */
+                       priv->write_reg(priv, XCAN_FSR_OFFSET,
+                                       XCAN_FSR_IRI_MASK);
+               else
+                       /* clear rx-not-empty (will actually clear only if
+                        * empty)
+                        */
+                       priv->write_reg(priv, XCAN_ICR_OFFSET,
+                                       XCAN_IXR_RXNEMP_MASK);
        }
 
        if (work_done) {
@@ -840,7 +1030,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota)
        if (work_done < quota) {
                napi_complete_done(napi, work_done);
                ier = priv->read_reg(priv, XCAN_IER_OFFSET);
-               ier |= XCAN_IXR_RXNEMP_MASK;
+               ier |= xcan_rx_int_mask(priv);
                priv->write_reg(priv, XCAN_IER_OFFSET, ier);
        }
        return work_done;
@@ -908,8 +1098,8 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr)
        }
 
        while (frames_sent--) {
-               can_get_echo_skb(ndev, priv->tx_tail %
-                                       priv->tx_max);
+               stats->tx_bytes += can_get_echo_skb(ndev, priv->tx_tail %
+                                                   priv->tx_max);
                priv->tx_tail++;
                stats->tx_packets++;
        }
@@ -939,6 +1129,7 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
        struct xcan_priv *priv = netdev_priv(ndev);
        u32 isr, ier;
        u32 isr_errors;
+       u32 rx_int_mask = xcan_rx_int_mask(priv);
 
        /* Get the interrupt status from Xilinx CAN */
        isr = priv->read_reg(priv, XCAN_ISR_OFFSET);
@@ -958,16 +1149,17 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id)
 
        /* Check for the type of error interrupt and Processing it */
        isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK |
-                           XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK);
+                           XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK |
+                           XCAN_IXR_RXMNF_MASK);
        if (isr_errors) {
                priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors);
                xcan_err_interrupt(ndev, isr);
        }
 
        /* Check for the type of receive interrupt and Processing it */
-       if (isr & XCAN_IXR_RXNEMP_MASK) {
+       if (isr & rx_int_mask) {
                ier = priv->read_reg(priv, XCAN_IER_OFFSET);
-               ier &= ~XCAN_IXR_RXNEMP_MASK;
+               ier &= ~rx_int_mask;
                priv->write_reg(priv, XCAN_IER_OFFSET, ier);
                napi_schedule(&priv->napi);
        }
@@ -1214,13 +1406,35 @@ static const struct dev_pm_ops xcan_dev_pm_ops = {
 };
 
 static const struct xcan_devtype_data xcan_zynq_data = {
-       .caps = XCAN_CAP_WATERMARK,
+       .bittiming_const = &xcan_bittiming_const,
+       .btr_ts2_shift = XCAN_BTR_TS2_SHIFT,
+       .btr_sjw_shift = XCAN_BTR_SJW_SHIFT,
+       .bus_clk_name = "pclk",
+};
+
+static const struct xcan_devtype_data xcan_axi_data = {
+       .bittiming_const = &xcan_bittiming_const,
+       .btr_ts2_shift = XCAN_BTR_TS2_SHIFT,
+       .btr_sjw_shift = XCAN_BTR_SJW_SHIFT,
+       .bus_clk_name = "s_axi_aclk",
+};
+
+static const struct xcan_devtype_data xcan_canfd_data = {
+       .flags = XCAN_FLAG_EXT_FILTERS |
+                XCAN_FLAG_RXMNF |
+                XCAN_FLAG_TX_MAILBOXES |
+                XCAN_FLAG_RX_FIFO_MULTI,
+       .bittiming_const = &xcan_bittiming_const,
+       .btr_ts2_shift = XCAN_BTR_TS2_SHIFT_CANFD,
+       .btr_sjw_shift = XCAN_BTR_SJW_SHIFT_CANFD,
+       .bus_clk_name = "s_axi_aclk",
 };
 
 /* Match table for OF platform binding */
 static const struct of_device_id xcan_of_match[] = {
        { .compatible = "xlnx,zynq-can-1.0", .data = &xcan_zynq_data },
-       { .compatible = "xlnx,axi-can-1.00.a", },
+       { .compatible = "xlnx,axi-can-1.00.a", .data = &xcan_axi_data },
+       { .compatible = "xlnx,canfd-1.0", .data = &xcan_canfd_data },
        { /* end of list */ },
 };
 MODULE_DEVICE_TABLE(of, xcan_of_match);
@@ -1240,9 +1454,12 @@ static int xcan_probe(struct platform_device *pdev)
        struct net_device *ndev;
        struct xcan_priv *priv;
        const struct of_device_id *of_id;
-       int caps = 0;
+       const struct xcan_devtype_data *devtype = &xcan_axi_data;
        void __iomem *addr;
-       int ret, rx_max, tx_max, tx_fifo_depth;
+       int ret;
+       int rx_max, tx_max;
+       int hw_tx_max, hw_rx_max;
+       const char *hw_tx_max_property;
 
        /* Get the virtual base address for the device */
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1252,25 +1469,33 @@ static int xcan_probe(struct platform_device *pdev)
                goto err;
        }
 
-       ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth",
-                                  &tx_fifo_depth);
-       if (ret < 0)
-               goto err;
+       of_id = of_match_device(xcan_of_match, &pdev->dev);
+       if (of_id && of_id->data)
+               devtype = of_id->data;
 
-       ret = of_property_read_u32(pdev->dev.of_node, "rx-fifo-depth", &rx_max);
-       if (ret < 0)
-               goto err;
+       hw_tx_max_property = devtype->flags & XCAN_FLAG_TX_MAILBOXES ?
+                            "tx-mailbox-count" : "tx-fifo-depth";
 
-       of_id = of_match_device(xcan_of_match, &pdev->dev);
-       if (of_id) {
-               const struct xcan_devtype_data *devtype_data = of_id->data;
+       ret = of_property_read_u32(pdev->dev.of_node, hw_tx_max_property,
+                                  &hw_tx_max);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "missing %s property\n",
+                       hw_tx_max_property);
+               goto err;
+       }
 
-               if (devtype_data)
-                       caps = devtype_data->caps;
+       ret = of_property_read_u32(pdev->dev.of_node, "rx-fifo-depth",
+                                  &hw_rx_max);
+       if (ret < 0) {
+               dev_err(&pdev->dev,
+                       "missing rx-fifo-depth property (mailbox mode is not supported)\n");
+               goto err;
        }
 
-       /* There is no way to directly figure out how many frames have been
-        * sent when the TXOK interrupt is processed. If watermark programming
+       /* With TX FIFO:
+        *
+        * There is no way to directly figure out how many frames have been
+        * sent when the TXOK interrupt is processed. If TXFEMP
         * is supported, we can have 2 frames in the FIFO and use TXFEMP
         * to determine if 1 or 2 frames have been sent.
         * Theoretically we should be able to use TXFWMEMP to determine up
@@ -1279,12 +1504,20 @@ static int xcan_probe(struct platform_device *pdev)
         * than 2 frames in FIFO) is set anyway with no TXOK (a frame was
         * sent), which is not a sensible state - possibly TXFWMEMP is not
         * completely synchronized with the rest of the bits?
+        *
+        * With TX mailboxes:
+        *
+        * HW sends frames in CAN ID priority order. To preserve FIFO ordering
+        * we submit frames one at a time.
         */
-       if (caps & XCAN_CAP_WATERMARK)
-               tx_max = min(tx_fifo_depth, 2);
+       if (!(devtype->flags & XCAN_FLAG_TX_MAILBOXES) &&
+           (devtype->flags & XCAN_FLAG_TXFEMP))
+               tx_max = min(hw_tx_max, 2);
        else
                tx_max = 1;
 
+       rx_max = hw_rx_max;
+
        /* Create a CAN device instance */
        ndev = alloc_candev(sizeof(struct xcan_priv), tx_max);
        if (!ndev)
@@ -1292,13 +1525,14 @@ static int xcan_probe(struct platform_device *pdev)
 
        priv = netdev_priv(ndev);
        priv->dev = &pdev->dev;
-       priv->can.bittiming_const = &xcan_bittiming_const;
+       priv->can.bittiming_const = devtype->bittiming_const;
        priv->can.do_set_mode = xcan_do_set_mode;
        priv->can.do_get_berr_counter = xcan_get_berr_counter;
        priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK |
                                        CAN_CTRLMODE_BERR_REPORTING;
        priv->reg_base = addr;
        priv->tx_max = tx_max;
+       priv->devtype = *devtype;
        spin_lock_init(&priv->tx_lock);
 
        /* Get IRQ for the device */
@@ -1316,22 +1550,12 @@ static int xcan_probe(struct platform_device *pdev)
                ret = PTR_ERR(priv->can_clk);
                goto err_free;
        }
-       /* Check for type of CAN device */
-       if (of_device_is_compatible(pdev->dev.of_node,
-                                   "xlnx,zynq-can-1.0")) {
-               priv->bus_clk = devm_clk_get(&pdev->dev, "pclk");
-               if (IS_ERR(priv->bus_clk)) {
-                       dev_err(&pdev->dev, "bus clock not found\n");
-                       ret = PTR_ERR(priv->bus_clk);
-                       goto err_free;
-               }
-       } else {
-               priv->bus_clk = devm_clk_get(&pdev->dev, "s_axi_aclk");
-               if (IS_ERR(priv->bus_clk)) {
-                       dev_err(&pdev->dev, "bus clock not found\n");
-                       ret = PTR_ERR(priv->bus_clk);
-                       goto err_free;
-               }
+
+       priv->bus_clk = devm_clk_get(&pdev->dev, devtype->bus_clk_name);
+       if (IS_ERR(priv->bus_clk)) {
+               dev_err(&pdev->dev, "bus clock not found\n");
+               ret = PTR_ERR(priv->bus_clk);
+               goto err_free;
        }
 
        priv->write_reg = xcan_write_reg_le;
@@ -1364,9 +1588,9 @@ static int xcan_probe(struct platform_device *pdev)
 
        pm_runtime_put(&pdev->dev);
 
-       netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth: actual %d, using %d\n",
-                       priv->reg_base, ndev->irq, priv->can.clock.freq,
-                       tx_fifo_depth, priv->tx_max);
+       netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx buffers: actual %d, using %d\n",
+                  priv->reg_base, ndev->irq, priv->can.clock.freq,
+                  hw_tx_max, priv->tx_max);
 
        return 0;
 
index 2b81b97e994f1743c67ad1aff565542a1321a07c..d3ce1e4cb4d3cdc9126cef772d12df479d6048b8 100644 (file)
@@ -5,7 +5,7 @@ source "drivers/net/dsa/b53/Kconfig"
 
 config NET_DSA_BCM_SF2
        tristate "Broadcom Starfighter 2 Ethernet switch support"
-       depends on HAS_IOMEM && NET_DSA && OF_MDIO
+       depends on HAS_IOMEM && NET_DSA
        select NET_DSA_TAG_BRCM
        select FIXED_PHY
        select BCM7XXX_PHY
@@ -52,6 +52,17 @@ config NET_DSA_QCA8K
          This enables support for the Qualcomm Atheros QCA8K Ethernet
          switch chips.
 
+config NET_DSA_REALTEK_SMI
+       tristate "Realtek SMI Ethernet switch family support"
+       depends on NET_DSA
+       select FIXED_PHY
+       select IRQ_DOMAIN
+       select REALTEK_PHY
+       select REGMAP
+       ---help---
+         This enables support for the Realtek SMI-based switch
+         chips, currently only RTL8366RB.
+
 config NET_DSA_SMSC_LAN9303
        tristate
        select NET_DSA_TAG_LAN9303
@@ -76,4 +87,15 @@ config NET_DSA_SMSC_LAN9303_MDIO
          Enable access functions if the SMSC/Microchip LAN9303 is configured
          for MDIO managed mode.
 
+config NET_DSA_VITESSE_VSC73XX
+       tristate "Vitesse VSC7385/7388/7395/7398 support"
+       depends on OF && SPI
+       depends on NET_DSA
+       select FIXED_PHY
+       select VITESSE_PHY
+       select GPIOLIB
+       ---help---
+         This enables support for the Vitesse VSC7385, VSC7388,
+         VSC7395 and VSC7398 SparX integrated ethernet switches.
+
 endmenu
index 15c2a831edf192b2678901c9a4c6fce7e9df62cd..46c1cba91ffebfdd35a475fcb7cc66d76f249909 100644 (file)
@@ -8,9 +8,12 @@ endif
 obj-$(CONFIG_NET_DSA_MT7530)   += mt7530.o
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_QCA8K)    += qca8k.o
+obj-$(CONFIG_NET_DSA_REALTEK_SMI) += realtek.o
+realtek-objs                   := realtek-smi.o rtl8366.o rtl8366rb.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303) += lan9303-core.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303_I2C) += lan9303_i2c.o
 obj-$(CONFIG_NET_DSA_SMSC_LAN9303_MDIO) += lan9303_mdio.o
+obj-$(CONFIG_NET_DSA_VITESSE_VSC73XX) += vitesse-vsc73xx.o
 obj-y                          += b53/
 obj-y                          += microchip/
 obj-y                          += mv88e6xxx/
index 02e8982519cebcfad46f21dbc6c53d605e6b6f8a..e0066adcd2f3d6ffd8e8ea6baba94337df0cd457 100644 (file)
@@ -166,6 +166,11 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
        reg &= ~P_TXQ_PSM_VDD(port);
        core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
 
+       /* Enable learning */
+       reg = core_readl(priv, CORE_DIS_LEARN);
+       reg &= ~BIT(port);
+       core_writel(priv, reg, CORE_DIS_LEARN);
+
        /* Enable Broadcom tags for that port if requested */
        if (priv->brcm_tag_mask & BIT(port))
                b53_brcm_hdr_setup(ds, port);
@@ -220,10 +225,15 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
                                 struct phy_device *phy)
 {
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
-       u32 off, reg;
+       u32 reg;
 
-       if (priv->wol_ports_mask & (1 << port))
+       /* Disable learning while in WoL mode */
+       if (priv->wol_ports_mask & (1 << port)) {
+               reg = core_readl(priv, CORE_DIS_LEARN);
+               reg |= BIT(port);
+               core_writel(priv, reg, CORE_DIS_LEARN);
                return;
+       }
 
        if (port == priv->moca_port)
                bcm_sf2_port_intr_disable(priv, port);
@@ -231,11 +241,6 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
        if (priv->int_phy_mask & 1 << port && priv->hw_params.num_gphy == 1)
                bcm_sf2_gphy_enable_set(ds, false);
 
-       if (dsa_is_cpu_port(ds, port))
-               off = CORE_IMP_CTL;
-       else
-               off = CORE_G_PCTL_PORT(port);
-
        b53_disable_port(ds, port, phy);
 
        /* Power down the port memory */
index b89acaee12d4364247a694ad989d2ae06ca640aa..1e37b65aab931a39d97cffbcfa49f541d748d260 100644 (file)
@@ -755,7 +755,8 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port,
        port_num = fs->ring_cookie / SF2_NUM_EGRESS_QUEUES;
 
        if (fs->ring_cookie == RX_CLS_FLOW_DISC ||
-           !dsa_is_user_port(ds, port_num) ||
+           !(dsa_is_user_port(ds, port_num) ||
+             dsa_is_cpu_port(ds, port_num)) ||
            port_num >= priv->hw_params.num_ports)
                return -EINVAL;
        /*
index 3ccd5a865dcba5f8615082ed6162bc24c97c0d4e..0a1e530d52b727a522626a1107138c853e44b011 100644 (file)
@@ -168,6 +168,8 @@ enum bcm_sf2_reg_offs {
 #define CORE_SWITCH_CTRL               0x00088
 #define  MII_DUMB_FWDG_EN              (1 << 6)
 
+#define CORE_DIS_LEARN                 0x000f0
+
 #define CORE_SFT_LRN_CTRL              0x000f8
 #define  SW_LEARN_CNTL(x)              (1 << (x))
 
index bb28c701381a6117d81837c0bc322f1bde31f4ee..0b5a2c31f3951c6b3b10bb2dda008f5f5ad1a04a 100644 (file)
@@ -2819,6 +2819,8 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+       .avb_ops = &mv88e6165_avb_ops,
+       .ptp_ops = &mv88e6165_ptp_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6165_ops = {
@@ -2847,6 +2849,8 @@ static const struct mv88e6xxx_ops mv88e6165_ops = {
        .reset = mv88e6352_g1_reset,
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+       .avb_ops = &mv88e6165_avb_ops,
+       .ptp_ops = &mv88e6165_ptp_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6171_ops = {
@@ -3142,6 +3146,8 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
        .vtu_getnext = mv88e6390_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
        .serdes_power = mv88e6390_serdes_power,
+       .avb_ops = &mv88e6390_avb_ops,
+       .ptp_ops = &mv88e6352_ptp_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6240_ops = {
@@ -3184,6 +3190,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
        .serdes_power = mv88e6352_serdes_power,
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6352_avb_ops,
+       .ptp_ops = &mv88e6352_ptp_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6290_ops = {
@@ -3223,6 +3230,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
        .serdes_power = mv88e6390_serdes_power,
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6390_avb_ops,
+       .ptp_ops = &mv88e6352_ptp_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6320_ops = {
@@ -3261,6 +3269,7 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
        .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6352_avb_ops,
+       .ptp_ops = &mv88e6352_ptp_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
@@ -3297,6 +3306,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
        .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6352_avb_ops,
+       .ptp_ops = &mv88e6352_ptp_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6341_ops = {
@@ -3338,6 +3348,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
        .serdes_power = mv88e6341_serdes_power,
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6390_avb_ops,
+       .ptp_ops = &mv88e6352_ptp_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6350_ops = {
@@ -3411,6 +3422,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
        .vtu_getnext = mv88e6352_g1_vtu_getnext,
        .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
        .avb_ops = &mv88e6352_avb_ops,
+       .ptp_ops = &mv88e6352_ptp_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6352_ops = {
@@ -3453,6 +3465,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
        .serdes_power = mv88e6352_serdes_power,
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6352_avb_ops,
+       .ptp_ops = &mv88e6352_ptp_ops,
        .serdes_get_sset_count = mv88e6352_serdes_get_sset_count,
        .serdes_get_strings = mv88e6352_serdes_get_strings,
        .serdes_get_stats = mv88e6352_serdes_get_stats,
@@ -3497,6 +3510,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
        .serdes_power = mv88e6390_serdes_power,
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6390_avb_ops,
+       .ptp_ops = &mv88e6352_ptp_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6390x_ops = {
@@ -3538,6 +3552,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
        .serdes_power = mv88e6390_serdes_power,
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6390_avb_ops,
+       .ptp_ops = &mv88e6352_ptp_ops,
 };
 
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
@@ -3689,6 +3704,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .pvt = true,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_EDSA,
+               .ptp_support = true,
                .ops = &mv88e6161_ops,
        },
 
@@ -3711,6 +3727,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .pvt = true,
                .multi_chip = true,
                .tag_protocol = DSA_TAG_PROTO_DSA,
+               .ptp_support = true,
                .ops = &mv88e6165_ops,
        },
 
index 8ac3fbb15352d85f3f38ee4ffa30d551d34bc337..6aa6197ddc109f3979e0ecb5fc2de3d92340dbb9 100644 (file)
@@ -155,6 +155,7 @@ struct mv88e6xxx_bus_ops;
 struct mv88e6xxx_irq_ops;
 struct mv88e6xxx_gpio_ops;
 struct mv88e6xxx_avb_ops;
+struct mv88e6xxx_ptp_ops;
 
 struct mv88e6xxx_irq {
        u16 masked;
@@ -273,6 +274,7 @@ struct mv88e6xxx_chip {
        struct ptp_pin_desc     pin_config[MV88E6XXX_MAX_GPIO];
        u16 trig_config;
        u16 evcap_config;
+       u16 enable_count;
 
        /* Per-port timestamping resources. */
        struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS];
@@ -439,6 +441,9 @@ struct mv88e6xxx_ops {
 
        /* Remote Management Unit operations */
        int (*rmu_disable)(struct mv88e6xxx_chip *chip);
+
+       /* Precision Time Protocol operations */
+       const struct mv88e6xxx_ptp_ops *ptp_ops;
 };
 
 struct mv88e6xxx_irq_ops {
@@ -486,6 +491,24 @@ struct mv88e6xxx_avb_ops {
        int (*tai_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
 };
 
+struct mv88e6xxx_ptp_ops {
+       u64 (*clock_read)(const struct cyclecounter *cc);
+       int (*ptp_enable)(struct ptp_clock_info *ptp,
+                         struct ptp_clock_request *rq, int on);
+       int (*ptp_verify)(struct ptp_clock_info *ptp, unsigned int pin,
+                         enum ptp_pin_function func, unsigned int chan);
+       void (*event_work)(struct work_struct *ugly);
+       int (*port_enable)(struct mv88e6xxx_chip *chip, int port);
+       int (*port_disable)(struct mv88e6xxx_chip *chip, int port);
+       int (*global_enable)(struct mv88e6xxx_chip *chip);
+       int (*global_disable)(struct mv88e6xxx_chip *chip);
+       int n_ext_ts;
+       int arr0_sts_reg;
+       int arr1_sts_reg;
+       int dep_sts_reg;
+       u32 rx_filters;
+};
+
 #define STATS_TYPE_PORT                BIT(0)
 #define STATS_TYPE_BANK0       BIT(1)
 #define STATS_TYPE_BANK1       BIT(2)
index 37e8ce2c72a0d3bd293f53bd87ec12c8a270e190..194660d8c7834a3953c96d2dcce15f467fee5c3b 100644 (file)
 #define MV88E6390_G2_AVB_CMD_OP_WRITE          0x6000
 #define MV88E6352_G2_AVB_CMD_PORT_MASK         0x0f00
 #define MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL    0xe
+#define MV88E6165_G2_AVB_CMD_PORT_PTPGLOBAL    0xf
 #define MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL    0xf
 #define MV88E6390_G2_AVB_CMD_PORT_MASK         0x1f00
 #define MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL    0x1e
@@ -335,6 +336,7 @@ int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target,
 extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
 
+extern const struct mv88e6xxx_avb_ops mv88e6165_avb_ops;
 extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;
 extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops;
 
@@ -484,6 +486,7 @@ static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {};
 static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {};
 
+static const struct mv88e6xxx_avb_ops mv88e6165_avb_ops = {};
 static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {};
 static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {};
 
index 2e398ccb88cae27a97e68095421d1b979bbe859b..672b503a67e1828cb62e730891ec5c5b8719ead4 100644 (file)
@@ -130,6 +130,31 @@ const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {
        .tai_write              = mv88e6352_g2_avb_tai_write,
 };
 
+static int mv88e6165_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+                                    u16 *data, int len)
+{
+       return mv88e6352_g2_avb_port_ptp_read(chip,
+                                       MV88E6165_G2_AVB_CMD_PORT_PTPGLOBAL,
+                                       addr, data, len);
+}
+
+static int mv88e6165_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+                                     u16 data)
+{
+       return mv88e6352_g2_avb_port_ptp_write(chip,
+                                       MV88E6165_G2_AVB_CMD_PORT_PTPGLOBAL,
+                                       addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6165_avb_ops = {
+       .port_ptp_read          = mv88e6352_g2_avb_port_ptp_read,
+       .port_ptp_write         = mv88e6352_g2_avb_port_ptp_write,
+       .ptp_read               = mv88e6352_g2_avb_ptp_read,
+       .ptp_write              = mv88e6352_g2_avb_ptp_write,
+       .tai_read               = mv88e6165_g2_avb_tai_read,
+       .tai_write              = mv88e6165_g2_avb_tai_write,
+};
+
 static int mv88e6390_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
                                          int port, int addr, u16 *data,
                                          int len)
index a036c490b7ce0662f0c29f8fe0e6609344639455..a17c16a2ab789b4c036d11efdad0c99d0bbdc15c 100644 (file)
@@ -51,17 +51,30 @@ static int mv88e6xxx_ptp_write(struct mv88e6xxx_chip *chip, int addr,
        return chip->info->ops->avb_ops->ptp_write(chip, addr, data);
 }
 
+static int mv88e6xxx_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+                             u16 *data)
+{
+       if (!chip->info->ops->avb_ops->ptp_read)
+               return -EOPNOTSUPP;
+
+       return chip->info->ops->avb_ops->ptp_read(chip, addr, data, 1);
+}
+
 /* TX_TSTAMP_TIMEOUT: This limits the time spent polling for a TX
  * timestamp. When working properly, hardware will produce a timestamp
  * within 1ms. Software may enounter delays due to MDIO contention, so
  * the timeout is set accordingly.
  */
-#define TX_TSTAMP_TIMEOUT      msecs_to_jiffies(20)
+#define TX_TSTAMP_TIMEOUT      msecs_to_jiffies(40)
 
 int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
                          struct ethtool_ts_info *info)
 {
-       struct mv88e6xxx_chip *chip = ds->priv;
+       const struct mv88e6xxx_ptp_ops *ptp_ops;
+       struct mv88e6xxx_chip *chip;
+
+       chip = ds->priv;
+       ptp_ops = chip->info->ops->ptp_ops;
 
        if (!chip->info->ptp_support)
                return -EOPNOTSUPP;
@@ -74,17 +87,7 @@ int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
        info->tx_types =
                (1 << HWTSTAMP_TX_OFF) |
                (1 << HWTSTAMP_TX_ON);
-       info->rx_filters =
-               (1 << HWTSTAMP_FILTER_NONE) |
-               (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
-               (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
-               (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
-               (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
-               (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
-               (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
-               (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
-               (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
-               (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ);
+       info->rx_filters = ptp_ops->rx_filters;
 
        return 0;
 }
@@ -92,10 +95,9 @@ int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
 static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port,
                                         struct hwtstamp_config *config)
 {
+       const struct mv88e6xxx_ptp_ops *ptp_ops = chip->info->ops->ptp_ops;
        struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
        bool tstamp_enable = false;
-       u16 port_config0;
-       int err;
 
        /* Prevent the TX/RX paths from trying to interact with the
         * timestamp hardware while we reconfigure it.
@@ -120,6 +122,14 @@ static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port,
        /* The switch supports timestamping both L2 and L4; one cannot be
         * disabled independently of the other.
         */
+
+       if (!(BIT(config->rx_filter) & ptp_ops->rx_filters)) {
+               config->rx_filter = HWTSTAMP_FILTER_NONE;
+               dev_dbg(chip->dev, "Unsupported rx_filter %d\n",
+                       config->rx_filter);
+               return -ERANGE;
+       }
+
        switch (config->rx_filter) {
        case HWTSTAMP_FILTER_NONE:
                tstamp_enable = false;
@@ -141,24 +151,22 @@ static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port,
                return -ERANGE;
        }
 
+       mutex_lock(&chip->reg_lock);
        if (tstamp_enable) {
-               /* Disable transportSpecific value matching, so that packets
-                * with either 1588 (0) and 802.1AS (1) will be timestamped.
-                */
-               port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH;
+               chip->enable_count += 1;
+               if (chip->enable_count == 1 && ptp_ops->global_enable)
+                       ptp_ops->global_enable(chip);
+               if (ptp_ops->port_enable)
+                       ptp_ops->port_enable(chip, port);
        } else {
-               /* Disable PTP. This disables both RX and TX timestamping. */
-               port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP;
+               if (ptp_ops->port_disable)
+                       ptp_ops->port_disable(chip, port);
+               chip->enable_count -= 1;
+               if (chip->enable_count == 0 && ptp_ops->global_disable)
+                       ptp_ops->global_disable(chip);
        }
-
-       mutex_lock(&chip->reg_lock);
-       err = mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
-                                      port_config0);
        mutex_unlock(&chip->reg_lock);
 
-       if (err < 0)
-               return err;
-
        /* Once hardware has been configured, enable timestamp checks
         * in the RX/TX paths.
         */
@@ -338,17 +346,18 @@ static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip,
 static void mv88e6xxx_rxtstamp_work(struct mv88e6xxx_chip *chip,
                                    struct mv88e6xxx_port_hwtstamp *ps)
 {
+       const struct mv88e6xxx_ptp_ops *ptp_ops = chip->info->ops->ptp_ops;
        struct sk_buff *skb;
 
        skb = skb_dequeue(&ps->rx_queue);
 
        if (skb)
-               mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR0_STS,
+               mv88e6xxx_get_rxts(chip, ps, skb, ptp_ops->arr0_sts_reg,
                                   &ps->rx_queue);
 
        skb = skb_dequeue(&ps->rx_queue2);
        if (skb)
-               mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR1_STS,
+               mv88e6xxx_get_rxts(chip, ps, skb, ptp_ops->arr1_sts_reg,
                                   &ps->rx_queue2);
 }
 
@@ -389,6 +398,7 @@ bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
 static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip,
                                   struct mv88e6xxx_port_hwtstamp *ps)
 {
+       const struct mv88e6xxx_ptp_ops *ptp_ops = chip->info->ops->ptp_ops;
        struct skb_shared_hwtstamps shhwtstamps;
        u16 departure_block[4], status;
        struct sk_buff *tmp_skb;
@@ -401,7 +411,7 @@ static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip,
 
        mutex_lock(&chip->reg_lock);
        err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
-                                     MV88E6XXX_PORT_PTP_DEP_STS,
+                                     ptp_ops->dep_sts_reg,
                                      departure_block,
                                      ARRAY_SIZE(departure_block));
        mutex_unlock(&chip->reg_lock);
@@ -425,8 +435,7 @@ static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip,
 
        /* We have the timestamp; go ahead and clear valid now */
        mutex_lock(&chip->reg_lock);
-       mv88e6xxx_port_ptp_write(chip, ps->port_id,
-                                MV88E6XXX_PORT_PTP_DEP_STS, 0);
+       mv88e6xxx_port_ptp_write(chip, ps->port_id, ptp_ops->dep_sts_reg, 0);
        mutex_unlock(&chip->reg_lock);
 
        status = departure_block[0] & MV88E6XXX_PTP_TS_STATUS_MASK;
@@ -522,8 +531,48 @@ bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
        return true;
 }
 
+int mv88e6165_global_disable(struct mv88e6xxx_chip *chip)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_ptp_read(chip, MV88E6165_PTP_CFG, &val);
+       if (err)
+               return err;
+       val |= MV88E6165_PTP_CFG_DISABLE_PTP;
+
+       return mv88e6xxx_ptp_write(chip, MV88E6165_PTP_CFG, val);
+}
+
+int mv88e6165_global_enable(struct mv88e6xxx_chip *chip)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_ptp_read(chip, MV88E6165_PTP_CFG, &val);
+       if (err)
+               return err;
+
+       val &= ~(MV88E6165_PTP_CFG_DISABLE_PTP | MV88E6165_PTP_CFG_TSPEC_MASK);
+
+       return mv88e6xxx_ptp_write(chip, MV88E6165_PTP_CFG, val);
+}
+
+int mv88e6352_hwtstamp_port_disable(struct mv88e6xxx_chip *chip, int port)
+{
+       return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+                                       MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP);
+}
+
+int mv88e6352_hwtstamp_port_enable(struct mv88e6xxx_chip *chip, int port)
+{
+       return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+                                       MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH);
+}
+
 static int mv88e6xxx_hwtstamp_port_setup(struct mv88e6xxx_chip *chip, int port)
 {
+       const struct mv88e6xxx_ptp_ops *ptp_ops = chip->info->ops->ptp_ops;
        struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
 
        ps->port_id = port;
@@ -531,12 +580,15 @@ static int mv88e6xxx_hwtstamp_port_setup(struct mv88e6xxx_chip *chip, int port)
        skb_queue_head_init(&ps->rx_queue);
        skb_queue_head_init(&ps->rx_queue2);
 
-       return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
-                                       MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP);
+       if (ptp_ops->port_disable)
+               return ptp_ops->port_disable(chip, port);
+
+       return 0;
 }
 
 int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
 {
+       const struct mv88e6xxx_ptp_ops *ptp_ops = chip->info->ops->ptp_ops;
        int err;
        int i;
 
@@ -547,6 +599,18 @@ int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
                        return err;
        }
 
+       /* Disable PTP globally */
+       if (ptp_ops->global_disable) {
+               err = ptp_ops->global_disable(chip);
+               if (err)
+                       return err;
+       }
+
+       /* Set the ethertype of L2 PTP messages */
+       err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_GC_ETYPE, ETH_P_1588);
+       if (err)
+               return err;
+
        /* MV88E6XXX_PTP_MSG_TYPE is a mask of PTP message types to
         * timestamp. This affects all ports that have timestamping enabled,
         * but the timestamp config is per-port; thus we configure all events
index bc71c9212a0813adbe0519a9a0a27ab812bfe08a..b9a72661bcc4607f7c0a4c8ac0b429319b1f3dca 100644 (file)
@@ -19,7 +19,7 @@
 
 #include "chip.h"
 
-/* Global PTP registers */
+/* Global 6352 PTP registers */
 /* Offset 0x00: PTP EtherType */
 #define MV88E6XXX_PTP_ETHERTYPE        0x00
 
 /* Offset 0x02: Timestamp Arrival Capture Pointers */
 #define MV88E6XXX_PTP_TS_ARRIVAL_PTR   0x02
 
+/* Offset 0x05: PTP Global Configuration */
+#define MV88E6165_PTP_CFG                      0x05
+#define MV88E6165_PTP_CFG_TSPEC_MASK           0xf000
+#define MV88E6165_PTP_CFG_DISABLE_TS_OVERWRITE BIT(1)
+#define MV88E6165_PTP_CFG_DISABLE_PTP          BIT(0)
+
 /* Offset 0x07: PTP Global Configuration */
 #define MV88E6341_PTP_CFG                      0x07
 #define MV88E6341_PTP_CFG_UPDATE               0x8000
@@ -46,7 +52,7 @@
 /* Offset 0x08: PTP Interrupt Status */
 #define MV88E6XXX_PTP_IRQ_STATUS       0x08
 
-/* Per-Port PTP Registers */
+/* Per-Port 6352 PTP Registers */
 /* Offset 0x00: PTP Configuration 0 */
 #define MV88E6XXX_PORT_PTP_CFG0                                0x00
 #define MV88E6XXX_PORT_PTP_CFG0_TSPEC_SHIFT            12
@@ -123,6 +129,10 @@ int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
 
 int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip);
+int mv88e6352_hwtstamp_port_enable(struct mv88e6xxx_chip *chip, int port);
+int mv88e6352_hwtstamp_port_disable(struct mv88e6xxx_chip *chip, int port);
+int mv88e6165_global_enable(struct mv88e6xxx_chip *chip);
+int mv88e6165_global_disable(struct mv88e6xxx_chip *chip);
 
 #else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
 
index bd85e2c390e148ba15c83677ea2d7486d1218dd6..4b336d8d4c6743dc0f1bb63e5a68f9c19e626000 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "chip.h"
 #include "global2.h"
+#include "hwtstamp.h"
 #include "ptp.h"
 
 /* Raw timestamps are in units of 8-ns clock periods. */
@@ -50,7 +51,7 @@ static int mv88e6xxx_tai_write(struct mv88e6xxx_chip *chip, int addr, u16 data)
 }
 
 /* TODO: places where this are called should be using pinctrl */
-static int mv88e6xxx_set_gpio_func(struct mv88e6xxx_chip *chip, int pin,
+static int mv88e6352_set_gpio_func(struct mv88e6xxx_chip *chip, int pin,
                                   int func, int input)
 {
        int err;
@@ -65,7 +66,7 @@ static int mv88e6xxx_set_gpio_func(struct mv88e6xxx_chip *chip, int pin,
        return chip->info->ops->gpio_ops->set_pctl(chip, pin, func);
 }
 
-static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
+static u64 mv88e6352_ptp_clock_read(const struct cyclecounter *cc)
 {
        struct mv88e6xxx_chip *chip = cc_to_chip(cc);
        u16 phc_time[2];
@@ -79,13 +80,27 @@ static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
                return ((u32)phc_time[1] << 16) | phc_time[0];
 }
 
-/* mv88e6xxx_config_eventcap - configure TAI event capture
+static u64 mv88e6165_ptp_clock_read(const struct cyclecounter *cc)
+{
+       struct mv88e6xxx_chip *chip = cc_to_chip(cc);
+       u16 phc_time[2];
+       int err;
+
+       err = mv88e6xxx_tai_read(chip, MV88E6XXX_PTP_GC_TIME_LO, phc_time,
+                                ARRAY_SIZE(phc_time));
+       if (err)
+               return 0;
+       else
+               return ((u32)phc_time[1] << 16) | phc_time[0];
+}
+
+/* mv88e6352_config_eventcap - configure TAI event capture
  * @event: PTP_CLOCK_PPS (internal) or PTP_CLOCK_EXTTS (external)
  * @rising: zero for falling-edge trigger, else rising-edge trigger
  *
  * This will also reset the capture sequence counter.
  */
-static int mv88e6xxx_config_eventcap(struct mv88e6xxx_chip *chip, int event,
+static int mv88e6352_config_eventcap(struct mv88e6xxx_chip *chip, int event,
                                     int rising)
 {
        u16 global_config;
@@ -118,7 +133,7 @@ static int mv88e6xxx_config_eventcap(struct mv88e6xxx_chip *chip, int event,
        return err;
 }
 
-static void mv88e6xxx_tai_event_work(struct work_struct *ugly)
+static void mv88e6352_tai_event_work(struct work_struct *ugly)
 {
        struct delayed_work *dw = to_delayed_work(ugly);
        struct mv88e6xxx_chip *chip = dw_tai_event_to_chip(dw);
@@ -232,7 +247,7 @@ static int mv88e6xxx_ptp_settime(struct ptp_clock_info *ptp,
        return 0;
 }
 
-static int mv88e6xxx_ptp_enable_extts(struct mv88e6xxx_chip *chip,
+static int mv88e6352_ptp_enable_extts(struct mv88e6xxx_chip *chip,
                                      struct ptp_clock_request *rq, int on)
 {
        int rising = (rq->extts.flags & PTP_RISING_EDGE);
@@ -250,18 +265,18 @@ static int mv88e6xxx_ptp_enable_extts(struct mv88e6xxx_chip *chip,
        if (on) {
                func = MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ;
 
-               err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+               err = mv88e6352_set_gpio_func(chip, pin, func, true);
                if (err)
                        goto out;
 
                schedule_delayed_work(&chip->tai_event_work,
                                      TAI_EVENT_WORK_INTERVAL);
 
-               err = mv88e6xxx_config_eventcap(chip, PTP_CLOCK_EXTTS, rising);
+               err = mv88e6352_config_eventcap(chip, PTP_CLOCK_EXTTS, rising);
        } else {
                func = MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO;
 
-               err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+               err = mv88e6352_set_gpio_func(chip, pin, func, true);
 
                cancel_delayed_work_sync(&chip->tai_event_work);
        }
@@ -272,20 +287,20 @@ out:
        return err;
 }
 
-static int mv88e6xxx_ptp_enable(struct ptp_clock_info *ptp,
+static int mv88e6352_ptp_enable(struct ptp_clock_info *ptp,
                                struct ptp_clock_request *rq, int on)
 {
        struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
 
        switch (rq->type) {
        case PTP_CLK_REQ_EXTTS:
-               return mv88e6xxx_ptp_enable_extts(chip, rq, on);
+               return mv88e6352_ptp_enable_extts(chip, rq, on);
        default:
                return -EOPNOTSUPP;
        }
 }
 
-static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+static int mv88e6352_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
                                enum ptp_pin_function func, unsigned int chan)
 {
        switch (func) {
@@ -299,6 +314,55 @@ static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
        return 0;
 }
 
+const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = {
+       .clock_read = mv88e6352_ptp_clock_read,
+       .ptp_enable = mv88e6352_ptp_enable,
+       .ptp_verify = mv88e6352_ptp_verify,
+       .event_work = mv88e6352_tai_event_work,
+       .port_enable = mv88e6352_hwtstamp_port_enable,
+       .port_disable = mv88e6352_hwtstamp_port_disable,
+       .n_ext_ts = 1,
+       .arr0_sts_reg = MV88E6XXX_PORT_PTP_ARR0_STS,
+       .arr1_sts_reg = MV88E6XXX_PORT_PTP_ARR1_STS,
+       .dep_sts_reg = MV88E6XXX_PORT_PTP_DEP_STS,
+       .rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ),
+};
+
+const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops = {
+       .clock_read = mv88e6165_ptp_clock_read,
+       .global_enable = mv88e6165_global_enable,
+       .global_disable = mv88e6165_global_disable,
+       .arr0_sts_reg = MV88E6165_PORT_PTP_ARR0_STS,
+       .arr1_sts_reg = MV88E6165_PORT_PTP_ARR1_STS,
+       .dep_sts_reg = MV88E6165_PORT_PTP_DEP_STS,
+       .rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+               (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ),
+};
+
+static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
+{
+       struct mv88e6xxx_chip *chip = cc_to_chip(cc);
+
+       if (chip->info->ops->ptp_ops->clock_read)
+               return chip->info->ops->ptp_ops->clock_read(cc);
+
+       return 0;
+}
+
 /* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3
  * seconds; this task forces periodic reads so that we don't miss any.
  */
@@ -317,6 +381,7 @@ static void mv88e6xxx_ptp_overflow_check(struct work_struct *work)
 
 int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
 {
+       const struct mv88e6xxx_ptp_ops *ptp_ops = chip->info->ops->ptp_ops;
        int i;
 
        /* Set up the cycle counter */
@@ -330,14 +395,15 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
                         ktime_to_ns(ktime_get_real()));
 
        INIT_DELAYED_WORK(&chip->overflow_work, mv88e6xxx_ptp_overflow_check);
-       INIT_DELAYED_WORK(&chip->tai_event_work, mv88e6xxx_tai_event_work);
+       if (ptp_ops->event_work)
+               INIT_DELAYED_WORK(&chip->tai_event_work, ptp_ops->event_work);
 
        chip->ptp_clock_info.owner = THIS_MODULE;
        snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name),
                 dev_name(chip->dev));
        chip->ptp_clock_info.max_adj    = 1000000;
 
-       chip->ptp_clock_info.n_ext_ts   = 1;
+       chip->ptp_clock_info.n_ext_ts   = ptp_ops->n_ext_ts;
        chip->ptp_clock_info.n_per_out  = 0;
        chip->ptp_clock_info.n_pins     = mv88e6xxx_num_gpio(chip);
        chip->ptp_clock_info.pps        = 0;
@@ -355,8 +421,8 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
        chip->ptp_clock_info.adjtime    = mv88e6xxx_ptp_adjtime;
        chip->ptp_clock_info.gettime64  = mv88e6xxx_ptp_gettime;
        chip->ptp_clock_info.settime64  = mv88e6xxx_ptp_settime;
-       chip->ptp_clock_info.enable     = mv88e6xxx_ptp_enable;
-       chip->ptp_clock_info.verify     = mv88e6xxx_ptp_verify;
+       chip->ptp_clock_info.enable     = ptp_ops->ptp_enable;
+       chip->ptp_clock_info.verify     = ptp_ops->ptp_verify;
        chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work;
 
        chip->ptp_clock = ptp_clock_register(&chip->ptp_clock_info, chip->dev);
@@ -373,7 +439,8 @@ void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
 {
        if (chip->ptp_clock) {
                cancel_delayed_work_sync(&chip->overflow_work);
-               cancel_delayed_work_sync(&chip->tai_event_work);
+               if (chip->info->ops->ptp_ops->event_work)
+                       cancel_delayed_work_sync(&chip->tai_event_work);
 
                ptp_clock_unregister(chip->ptp_clock);
                chip->ptp_clock = NULL;
index 10f271ab650daa82b994acb6e761297418a1e2c9..28a030840517b9c34e107836d0bdee03ef929b71 100644 (file)
 /* Offset 0x12: Lock Status */
 #define MV88E6XXX_TAI_LOCK_STATUS              0x12
 
+/* Offset 0x00: Ether Type */
+#define MV88E6XXX_PTP_GC_ETYPE                 0x00
+
+/* 6165 Global Control Registers */
+/* Offset 0x00: Ether Type */
+#define MV88E6XXX_PTP_GC_ETYPE                 0x00
+
+/* Offset 0x01: Message ID */
+#define MV88E6XXX_PTP_GC_MESSAGE_ID            0x01
+
+/* Offset 0x02: Time Stamp Arrive Time */
+#define MV88E6XXX_PTP_GC_TS_ARR_PTR            0x02
+
+/* Offset 0x03: Port Arrival Interrupt Enable */
+#define MV88E6XXX_PTP_GC_PORT_ARR_INT_EN       0x03
+
+/* Offset 0x04: Port Departure Interrupt Enable */
+#define MV88E6XXX_PTP_GC_PORT_DEP_INT_EN       0x04
+
+/* Offset 0x05: Configuration */
+#define MV88E6XXX_PTP_GC_CONFIG                        0x05
+#define MV88E6XXX_PTP_GC_CONFIG_DIS_OVERWRITE  BIT(1)
+#define MV88E6XXX_PTP_GC_CONFIG_DIS_TS         BIT(0)
+
+/* Offset 0x8: Interrupt Status */
+#define MV88E6XXX_PTP_GC_INT_STATUS            0x08
+
+/* Offset 0x9/0xa: Global Time */
+#define MV88E6XXX_PTP_GC_TIME_LO               0x09
+#define MV88E6XXX_PTP_GC_TIME_HI               0x0A
+
+/* 6165 Per Port Registers */
+/* Offset 0: Arrival Time 0 Status */
+#define MV88E6165_PORT_PTP_ARR0_STS    0x00
+
+/* Offset 0x01/0x02: PTP Arrival 0 Time */
+#define MV88E6165_PORT_PTP_ARR0_TIME_LO        0x01
+#define MV88E6165_PORT_PTP_ARR0_TIME_HI        0x02
+
+/* Offset 0x03: PTP Arrival 0 Sequence ID */
+#define MV88E6165_PORT_PTP_ARR0_SEQID  0x03
+
+/* Offset 0x04: PTP Arrival 1 Status */
+#define MV88E6165_PORT_PTP_ARR1_STS    0x04
+
+/* Offset 0x05/0x6E: PTP Arrival 1 Time */
+#define MV88E6165_PORT_PTP_ARR1_TIME_LO        0x05
+#define MV88E6165_PORT_PTP_ARR1_TIME_HI        0x06
+
+/* Offset 0x07: PTP Arrival 1 Sequence ID */
+#define MV88E6165_PORT_PTP_ARR1_SEQID  0x07
+
+/* Offset 0x08: PTP Departure Status */
+#define MV88E6165_PORT_PTP_DEP_STS     0x08
+
+/* Offset 0x09/0x0a: PTP Deperture Time */
+#define MV88E6165_PORT_PTP_DEP_TIME_LO 0x09
+#define MV88E6165_PORT_PTP_DEP_TIME_HI 0x0a
+
+/* Offset 0x0b: PTP Departure Sequence ID */
+#define MV88E6165_PORT_PTP_DEP_SEQID   0x0b
+
+/* Offset 0x0d: Port Status */
+#define MV88E6164_PORT_STATUS          0x0d
+
 #ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
 
 long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp);
@@ -87,6 +152,9 @@ void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
 #define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip,      \
                                      ptp_clock_info)
 
+extern const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops;
+extern const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops;
+
 #else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
 
 static inline long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
@@ -103,6 +171,9 @@ static inline void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
 {
 }
 
+static const struct mv88e6xxx_ptp_ops mv88e6352_ptp_ops = {};
+static const struct mv88e6xxx_ptp_ops mv88e6165_ptp_ops = {};
+
 #endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
 
 #endif /* _MV88E6XXX_PTP_H */
diff --git a/drivers/net/dsa/realtek-smi.c b/drivers/net/dsa/realtek-smi.c
new file mode 100644 (file)
index 0000000..b4b839a
--- /dev/null
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Realtek Simple Management Interface (SMI) driver
+ * It can be discussed how "simple" this interface is.
+ *
+ * The SMI protocol piggy-backs the MDIO MDC and MDIO signals levels
+ * but the protocol is not MDIO at all. Instead it is a Realtek
+ * pecularity that need to bit-bang the lines in a special way to
+ * communicate with the switch.
+ *
+ * ASICs we intend to support with this driver:
+ *
+ * RTL8366   - The original version, apparently
+ * RTL8369   - Similar enough to have the same datsheet as RTL8366
+ * RTL8366RB - Probably reads out "RTL8366 revision B", has a quite
+ *             different register layout from the other two
+ * RTL8366S  - Is this "RTL8366 super"?
+ * RTL8367   - Has an OpenWRT driver as well
+ * RTL8368S  - Seems to be an alternative name for RTL8366RB
+ * RTL8370   - Also uses SMI
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com>
+ * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv>
+ * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com>
+ * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/bitops.h>
+#include <linux/if_bridge.h>
+
+#include "realtek-smi.h"
+
+#define REALTEK_SMI_ACK_RETRY_COUNT            5
+#define REALTEK_SMI_HW_STOP_DELAY              25      /* msecs */
+#define REALTEK_SMI_HW_START_DELAY             100     /* msecs */
+
+static inline void realtek_smi_clk_delay(struct realtek_smi *smi)
+{
+       ndelay(smi->clk_delay);
+}
+
+static void realtek_smi_start(struct realtek_smi *smi)
+{
+       /* Set GPIO pins to output mode, with initial state:
+        * SCK = 0, SDA = 1
+        */
+       gpiod_direction_output(smi->mdc, 0);
+       gpiod_direction_output(smi->mdio, 1);
+       realtek_smi_clk_delay(smi);
+
+       /* CLK 1: 0 -> 1, 1 -> 0 */
+       gpiod_set_value(smi->mdc, 1);
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdc, 0);
+       realtek_smi_clk_delay(smi);
+
+       /* CLK 2: */
+       gpiod_set_value(smi->mdc, 1);
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdio, 0);
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdc, 0);
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdio, 1);
+}
+
+static void realtek_smi_stop(struct realtek_smi *smi)
+{
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdio, 0);
+       gpiod_set_value(smi->mdc, 1);
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdio, 1);
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdc, 1);
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdc, 0);
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdc, 1);
+
+       /* Add a click */
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdc, 0);
+       realtek_smi_clk_delay(smi);
+       gpiod_set_value(smi->mdc, 1);
+
+       /* Set GPIO pins to input mode */
+       gpiod_direction_input(smi->mdio);
+       gpiod_direction_input(smi->mdc);
+}
+
+static void realtek_smi_write_bits(struct realtek_smi *smi, u32 data, u32 len)
+{
+       for (; len > 0; len--) {
+               realtek_smi_clk_delay(smi);
+
+               /* Prepare data */
+               gpiod_set_value(smi->mdio, !!(data & (1 << (len - 1))));
+               realtek_smi_clk_delay(smi);
+
+               /* Clocking */
+               gpiod_set_value(smi->mdc, 1);
+               realtek_smi_clk_delay(smi);
+               gpiod_set_value(smi->mdc, 0);
+       }
+}
+
+static void realtek_smi_read_bits(struct realtek_smi *smi, u32 len, u32 *data)
+{
+       gpiod_direction_input(smi->mdio);
+
+       for (*data = 0; len > 0; len--) {
+               u32 u;
+
+               realtek_smi_clk_delay(smi);
+
+               /* Clocking */
+               gpiod_set_value(smi->mdc, 1);
+               realtek_smi_clk_delay(smi);
+               u = !!gpiod_get_value(smi->mdio);
+               gpiod_set_value(smi->mdc, 0);
+
+               *data |= (u << (len - 1));
+       }
+
+       gpiod_direction_output(smi->mdio, 0);
+}
+
+static int realtek_smi_wait_for_ack(struct realtek_smi *smi)
+{
+       int retry_cnt;
+
+       retry_cnt = 0;
+       do {
+               u32 ack;
+
+               realtek_smi_read_bits(smi, 1, &ack);
+               if (ack == 0)
+                       break;
+
+               if (++retry_cnt > REALTEK_SMI_ACK_RETRY_COUNT) {
+                       dev_err(smi->dev, "ACK timeout\n");
+                       return -ETIMEDOUT;
+               }
+       } while (1);
+
+       return 0;
+}
+
+static int realtek_smi_write_byte(struct realtek_smi *smi, u8 data)
+{
+       realtek_smi_write_bits(smi, data, 8);
+       return realtek_smi_wait_for_ack(smi);
+}
+
+static int realtek_smi_write_byte_noack(struct realtek_smi *smi, u8 data)
+{
+       realtek_smi_write_bits(smi, data, 8);
+       return 0;
+}
+
+static int realtek_smi_read_byte0(struct realtek_smi *smi, u8 *data)
+{
+       u32 t;
+
+       /* Read data */
+       realtek_smi_read_bits(smi, 8, &t);
+       *data = (t & 0xff);
+
+       /* Send an ACK */
+       realtek_smi_write_bits(smi, 0x00, 1);
+
+       return 0;
+}
+
+static int realtek_smi_read_byte1(struct realtek_smi *smi, u8 *data)
+{
+       u32 t;
+
+       /* Read data */
+       realtek_smi_read_bits(smi, 8, &t);
+       *data = (t & 0xff);
+
+       /* Send an ACK */
+       realtek_smi_write_bits(smi, 0x01, 1);
+
+       return 0;
+}
+
+static int realtek_smi_read_reg(struct realtek_smi *smi, u32 addr, u32 *data)
+{
+       unsigned long flags;
+       u8 lo = 0;
+       u8 hi = 0;
+       int ret;
+
+       spin_lock_irqsave(&smi->lock, flags);
+
+       realtek_smi_start(smi);
+
+       /* Send READ command */
+       ret = realtek_smi_write_byte(smi, smi->cmd_read);
+       if (ret)
+               goto out;
+
+       /* Set ADDR[7:0] */
+       ret = realtek_smi_write_byte(smi, addr & 0xff);
+       if (ret)
+               goto out;
+
+       /* Set ADDR[15:8] */
+       ret = realtek_smi_write_byte(smi, addr >> 8);
+       if (ret)
+               goto out;
+
+       /* Read DATA[7:0] */
+       realtek_smi_read_byte0(smi, &lo);
+       /* Read DATA[15:8] */
+       realtek_smi_read_byte1(smi, &hi);
+
+       *data = ((u32)lo) | (((u32)hi) << 8);
+
+       ret = 0;
+
+ out:
+       realtek_smi_stop(smi);
+       spin_unlock_irqrestore(&smi->lock, flags);
+
+       return ret;
+}
+
+static int realtek_smi_write_reg(struct realtek_smi *smi,
+                                u32 addr, u32 data, bool ack)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&smi->lock, flags);
+
+       realtek_smi_start(smi);
+
+       /* Send WRITE command */
+       ret = realtek_smi_write_byte(smi, smi->cmd_write);
+       if (ret)
+               goto out;
+
+       /* Set ADDR[7:0] */
+       ret = realtek_smi_write_byte(smi, addr & 0xff);
+       if (ret)
+               goto out;
+
+       /* Set ADDR[15:8] */
+       ret = realtek_smi_write_byte(smi, addr >> 8);
+       if (ret)
+               goto out;
+
+       /* Write DATA[7:0] */
+       ret = realtek_smi_write_byte(smi, data & 0xff);
+       if (ret)
+               goto out;
+
+       /* Write DATA[15:8] */
+       if (ack)
+               ret = realtek_smi_write_byte(smi, data >> 8);
+       else
+               ret = realtek_smi_write_byte_noack(smi, data >> 8);
+       if (ret)
+               goto out;
+
+       ret = 0;
+
+ out:
+       realtek_smi_stop(smi);
+       spin_unlock_irqrestore(&smi->lock, flags);
+
+       return ret;
+}
+
+/* There is one single case when we need to use this accessor and that
+ * is when issueing soft reset. Since the device reset as soon as we write
+ * that bit, no ACK will come back for natural reasons.
+ */
+int realtek_smi_write_reg_noack(struct realtek_smi *smi, u32 addr,
+                               u32 data)
+{
+       return realtek_smi_write_reg(smi, addr, data, false);
+}
+EXPORT_SYMBOL_GPL(realtek_smi_write_reg_noack);
+
+/* Regmap accessors */
+
+static int realtek_smi_write(void *ctx, u32 reg, u32 val)
+{
+       struct realtek_smi *smi = ctx;
+
+       return realtek_smi_write_reg(smi, reg, val, true);
+}
+
+static int realtek_smi_read(void *ctx, u32 reg, u32 *val)
+{
+       struct realtek_smi *smi = ctx;
+
+       return realtek_smi_read_reg(smi, reg, val);
+}
+
+static const struct regmap_config realtek_smi_mdio_regmap_config = {
+       .reg_bits = 10, /* A4..A0 R4..R0 */
+       .val_bits = 16,
+       .reg_stride = 1,
+       /* PHY regs are at 0x8000 */
+       .max_register = 0xffff,
+       .reg_format_endian = REGMAP_ENDIAN_BIG,
+       .reg_read = realtek_smi_read,
+       .reg_write = realtek_smi_write,
+       .cache_type = REGCACHE_NONE,
+};
+
+static int realtek_smi_mdio_read(struct mii_bus *bus, int addr, int regnum)
+{
+       struct realtek_smi *smi = bus->priv;
+
+       return smi->ops->phy_read(smi, addr, regnum);
+}
+
+static int realtek_smi_mdio_write(struct mii_bus *bus, int addr, int regnum,
+                                 u16 val)
+{
+       struct realtek_smi *smi = bus->priv;
+
+       return smi->ops->phy_write(smi, addr, regnum, val);
+}
+
+int realtek_smi_setup_mdio(struct realtek_smi *smi)
+{
+       struct device_node *mdio_np;
+       int ret;
+
+       mdio_np = of_find_compatible_node(smi->dev->of_node, NULL,
+                                         "realtek,smi-mdio");
+       if (!mdio_np) {
+               dev_err(smi->dev, "no MDIO bus node\n");
+               return -ENODEV;
+       }
+
+       smi->slave_mii_bus = devm_mdiobus_alloc(smi->dev);
+       if (!smi->slave_mii_bus)
+               return -ENOMEM;
+       smi->slave_mii_bus->priv = smi;
+       smi->slave_mii_bus->name = "SMI slave MII";
+       smi->slave_mii_bus->read = realtek_smi_mdio_read;
+       smi->slave_mii_bus->write = realtek_smi_mdio_write;
+       snprintf(smi->slave_mii_bus->id, MII_BUS_ID_SIZE, "SMI-%d",
+                smi->ds->index);
+       smi->slave_mii_bus->dev.of_node = mdio_np;
+       smi->slave_mii_bus->parent = smi->dev;
+       smi->ds->slave_mii_bus = smi->slave_mii_bus;
+
+       ret = of_mdiobus_register(smi->slave_mii_bus, mdio_np);
+       if (ret) {
+               dev_err(smi->dev, "unable to register MDIO bus %s\n",
+                       smi->slave_mii_bus->id);
+               of_node_put(mdio_np);
+       }
+
+       return 0;
+}
+
+static int realtek_smi_probe(struct platform_device *pdev)
+{
+       const struct realtek_smi_variant *var;
+       struct device *dev = &pdev->dev;
+       struct realtek_smi *smi;
+       struct device_node *np;
+       int ret;
+
+       var = of_device_get_match_data(dev);
+       np = dev->of_node;
+
+       smi = devm_kzalloc(dev, sizeof(*smi), GFP_KERNEL);
+       if (!smi)
+               return -ENOMEM;
+       smi->map = devm_regmap_init(dev, NULL, smi,
+                                   &realtek_smi_mdio_regmap_config);
+       if (IS_ERR(smi->map)) {
+               ret = PTR_ERR(smi->map);
+               dev_err(dev, "regmap init failed: %d\n", ret);
+               return ret;
+       }
+
+       /* Link forward and backward */
+       smi->dev = dev;
+       smi->clk_delay = var->clk_delay;
+       smi->cmd_read = var->cmd_read;
+       smi->cmd_write = var->cmd_write;
+       smi->ops = var->ops;
+
+       dev_set_drvdata(dev, smi);
+       spin_lock_init(&smi->lock);
+
+       /* TODO: if power is software controlled, set up any regulators here */
+
+       /* Assert then deassert RESET */
+       smi->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(smi->reset)) {
+               dev_err(dev, "failed to get RESET GPIO\n");
+               return PTR_ERR(smi->reset);
+       }
+       msleep(REALTEK_SMI_HW_STOP_DELAY);
+       gpiod_set_value(smi->reset, 0);
+       msleep(REALTEK_SMI_HW_START_DELAY);
+       dev_info(dev, "deasserted RESET\n");
+
+       /* Fetch MDIO pins */
+       smi->mdc = devm_gpiod_get_optional(dev, "mdc", GPIOD_OUT_LOW);
+       if (IS_ERR(smi->mdc))
+               return PTR_ERR(smi->mdc);
+       smi->mdio = devm_gpiod_get_optional(dev, "mdio", GPIOD_OUT_LOW);
+       if (IS_ERR(smi->mdio))
+               return PTR_ERR(smi->mdio);
+
+       smi->leds_disabled = of_property_read_bool(np, "realtek,disable-leds");
+
+       ret = smi->ops->detect(smi);
+       if (ret) {
+               dev_err(dev, "unable to detect switch\n");
+               return ret;
+       }
+
+       smi->ds = dsa_switch_alloc(dev, smi->num_ports);
+       if (!smi->ds)
+               return -ENOMEM;
+       smi->ds->priv = smi;
+
+       smi->ds->ops = var->ds_ops;
+       ret = dsa_register_switch(smi->ds);
+       if (ret) {
+               dev_err(dev, "unable to register switch ret = %d\n", ret);
+               return ret;
+       }
+       return 0;
+}
+
+static int realtek_smi_remove(struct platform_device *pdev)
+{
+       struct realtek_smi *smi = dev_get_drvdata(&pdev->dev);
+
+       dsa_unregister_switch(smi->ds);
+       gpiod_set_value(smi->reset, 1);
+
+       return 0;
+}
+
+static const struct of_device_id realtek_smi_of_match[] = {
+       {
+               .compatible = "realtek,rtl8366rb",
+               .data = &rtl8366rb_variant,
+       },
+       {
+               /* FIXME: add support for RTL8366S and more */
+               .compatible = "realtek,rtl8366s",
+               .data = NULL,
+       },
+       { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, realtek_smi_of_match);
+
+static struct platform_driver realtek_smi_driver = {
+       .driver = {
+               .name = "realtek-smi",
+               .of_match_table = of_match_ptr(realtek_smi_of_match),
+       },
+       .probe  = realtek_smi_probe,
+       .remove = realtek_smi_remove,
+};
+module_platform_driver(realtek_smi_driver);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/realtek-smi.h b/drivers/net/dsa/realtek-smi.h
new file mode 100644 (file)
index 0000000..9a63b51
--- /dev/null
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Realtek SMI interface driver defines
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org>
+ */
+
+#ifndef _REALTEK_SMI_H
+#define _REALTEK_SMI_H
+
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/gpio/consumer.h>
+#include <net/dsa.h>
+
+struct realtek_smi_ops;
+struct dentry;
+struct inode;
+struct file;
+
+struct rtl8366_mib_counter {
+       unsigned int    base;
+       unsigned int    offset;
+       unsigned int    length;
+       const char      *name;
+};
+
+struct rtl8366_vlan_mc {
+       u16     vid;
+       u16     untag;
+       u16     member;
+       u8      fid;
+       u8      priority;
+};
+
+struct rtl8366_vlan_4k {
+       u16     vid;
+       u16     untag;
+       u16     member;
+       u8      fid;
+};
+
+struct realtek_smi {
+       struct device           *dev;
+       struct gpio_desc        *reset;
+       struct gpio_desc        *mdc;
+       struct gpio_desc        *mdio;
+       struct regmap           *map;
+       struct mii_bus          *slave_mii_bus;
+
+       unsigned int            clk_delay;
+       u8                      cmd_read;
+       u8                      cmd_write;
+       spinlock_t              lock; /* Locks around command writes */
+       struct dsa_switch       *ds;
+       struct irq_domain       *irqdomain;
+       bool                    leds_disabled;
+
+       unsigned int            cpu_port;
+       unsigned int            num_ports;
+       unsigned int            num_vlan_mc;
+       unsigned int            num_mib_counters;
+       struct rtl8366_mib_counter *mib_counters;
+
+       const struct realtek_smi_ops *ops;
+
+       int                     vlan_enabled;
+       int                     vlan4k_enabled;
+
+       char                    buf[4096];
+};
+
+/**
+ * struct realtek_smi_ops - vtable for the per-SMI-chiptype operations
+ * @detect: detects the chiptype
+ */
+struct realtek_smi_ops {
+       int     (*detect)(struct realtek_smi *smi);
+       int     (*reset_chip)(struct realtek_smi *smi);
+       int     (*setup)(struct realtek_smi *smi);
+       void    (*cleanup)(struct realtek_smi *smi);
+       int     (*get_mib_counter)(struct realtek_smi *smi,
+                                  int port,
+                                  struct rtl8366_mib_counter *mib,
+                                  u64 *mibvalue);
+       int     (*get_vlan_mc)(struct realtek_smi *smi, u32 index,
+                              struct rtl8366_vlan_mc *vlanmc);
+       int     (*set_vlan_mc)(struct realtek_smi *smi, u32 index,
+                              const struct rtl8366_vlan_mc *vlanmc);
+       int     (*get_vlan_4k)(struct realtek_smi *smi, u32 vid,
+                              struct rtl8366_vlan_4k *vlan4k);
+       int     (*set_vlan_4k)(struct realtek_smi *smi,
+                              const struct rtl8366_vlan_4k *vlan4k);
+       int     (*get_mc_index)(struct realtek_smi *smi, int port, int *val);
+       int     (*set_mc_index)(struct realtek_smi *smi, int port, int index);
+       bool    (*is_vlan_valid)(struct realtek_smi *smi, unsigned int vlan);
+       int     (*enable_vlan)(struct realtek_smi *smi, bool enable);
+       int     (*enable_vlan4k)(struct realtek_smi *smi, bool enable);
+       int     (*enable_port)(struct realtek_smi *smi, int port, bool enable);
+       int     (*phy_read)(struct realtek_smi *smi, int phy, int regnum);
+       int     (*phy_write)(struct realtek_smi *smi, int phy, int regnum,
+                            u16 val);
+};
+
+struct realtek_smi_variant {
+       const struct dsa_switch_ops *ds_ops;
+       const struct realtek_smi_ops *ops;
+       unsigned int clk_delay;
+       u8 cmd_read;
+       u8 cmd_write;
+};
+
+/* SMI core calls */
+int realtek_smi_write_reg_noack(struct realtek_smi *smi, u32 addr,
+                               u32 data);
+int realtek_smi_setup_mdio(struct realtek_smi *smi);
+
+/* RTL8366 library helpers */
+int rtl8366_mc_is_used(struct realtek_smi *smi, int mc_index, int *used);
+int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
+                    u32 untag, u32 fid);
+int rtl8366_get_pvid(struct realtek_smi *smi, int port, int *val);
+int rtl8366_set_pvid(struct realtek_smi *smi, unsigned int port,
+                    unsigned int vid);
+int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable);
+int rtl8366_enable_vlan(struct realtek_smi *smi, bool enable);
+int rtl8366_reset_vlan(struct realtek_smi *smi);
+int rtl8366_init_vlan(struct realtek_smi *smi);
+int rtl8366_vlan_filtering(struct dsa_switch *ds, int port,
+                          bool vlan_filtering);
+int rtl8366_vlan_prepare(struct dsa_switch *ds, int port,
+                        const struct switchdev_obj_port_vlan *vlan);
+void rtl8366_vlan_add(struct dsa_switch *ds, int port,
+                     const struct switchdev_obj_port_vlan *vlan);
+int rtl8366_vlan_del(struct dsa_switch *ds, int port,
+                    const struct switchdev_obj_port_vlan *vlan);
+void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset,
+                        uint8_t *data);
+int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset);
+void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
+
+extern const struct realtek_smi_variant rtl8366rb_variant;
+
+#endif /*  _REALTEK_SMI_H */
diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c
new file mode 100644 (file)
index 0000000..6dedd43
--- /dev/null
@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Realtek SMI library helpers for the RTL8366x variants
+ * RTL8366RB and RTL8366S
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com>
+ * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv>
+ * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com>
+ */
+#include <linux/if_bridge.h>
+#include <net/dsa.h>
+
+#include "realtek-smi.h"
+
+int rtl8366_mc_is_used(struct realtek_smi *smi, int mc_index, int *used)
+{
+       int ret;
+       int i;
+
+       *used = 0;
+       for (i = 0; i < smi->num_ports; i++) {
+               int index = 0;
+
+               ret = smi->ops->get_mc_index(smi, i, &index);
+               if (ret)
+                       return ret;
+
+               if (mc_index == index) {
+                       *used = 1;
+                       break;
+               }
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rtl8366_mc_is_used);
+
+int rtl8366_set_vlan(struct realtek_smi *smi, int vid, u32 member,
+                    u32 untag, u32 fid)
+{
+       struct rtl8366_vlan_4k vlan4k;
+       int ret;
+       int i;
+
+       /* Update the 4K table */
+       ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+       if (ret)
+               return ret;
+
+       vlan4k.member = member;
+       vlan4k.untag = untag;
+       vlan4k.fid = fid;
+       ret = smi->ops->set_vlan_4k(smi, &vlan4k);
+       if (ret)
+               return ret;
+
+       /* Try to find an existing MC entry for this VID */
+       for (i = 0; i < smi->num_vlan_mc; i++) {
+               struct rtl8366_vlan_mc vlanmc;
+
+               ret = smi->ops->get_vlan_mc(smi, i, &vlanmc);
+               if (ret)
+                       return ret;
+
+               if (vid == vlanmc.vid) {
+                       /* update the MC entry */
+                       vlanmc.member = member;
+                       vlanmc.untag = untag;
+                       vlanmc.fid = fid;
+
+                       ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+                       break;
+               }
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(rtl8366_set_vlan);
+
+int rtl8366_get_pvid(struct realtek_smi *smi, int port, int *val)
+{
+       struct rtl8366_vlan_mc vlanmc;
+       int ret;
+       int index;
+
+       ret = smi->ops->get_mc_index(smi, port, &index);
+       if (ret)
+               return ret;
+
+       ret = smi->ops->get_vlan_mc(smi, index, &vlanmc);
+       if (ret)
+               return ret;
+
+       *val = vlanmc.vid;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rtl8366_get_pvid);
+
+int rtl8366_set_pvid(struct realtek_smi *smi, unsigned int port,
+                    unsigned int vid)
+{
+       struct rtl8366_vlan_mc vlanmc;
+       struct rtl8366_vlan_4k vlan4k;
+       int ret;
+       int i;
+
+       /* Try to find an existing MC entry for this VID */
+       for (i = 0; i < smi->num_vlan_mc; i++) {
+               ret = smi->ops->get_vlan_mc(smi, i, &vlanmc);
+               if (ret)
+                       return ret;
+
+               if (vid == vlanmc.vid) {
+                       ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+                       if (ret)
+                               return ret;
+
+                       ret = smi->ops->set_mc_index(smi, port, i);
+                       return ret;
+               }
+       }
+
+       /* We have no MC entry for this VID, try to find an empty one */
+       for (i = 0; i < smi->num_vlan_mc; i++) {
+               ret = smi->ops->get_vlan_mc(smi, i, &vlanmc);
+               if (ret)
+                       return ret;
+
+               if (vlanmc.vid == 0 && vlanmc.member == 0) {
+                       /* Update the entry from the 4K table */
+                       ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+                       if (ret)
+                               return ret;
+
+                       vlanmc.vid = vid;
+                       vlanmc.member = vlan4k.member;
+                       vlanmc.untag = vlan4k.untag;
+                       vlanmc.fid = vlan4k.fid;
+                       ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+                       if (ret)
+                               return ret;
+
+                       ret = smi->ops->set_mc_index(smi, port, i);
+                       return ret;
+               }
+       }
+
+       /* MC table is full, try to find an unused entry and replace it */
+       for (i = 0; i < smi->num_vlan_mc; i++) {
+               int used;
+
+               ret = rtl8366_mc_is_used(smi, i, &used);
+               if (ret)
+                       return ret;
+
+               if (!used) {
+                       /* Update the entry from the 4K table */
+                       ret = smi->ops->get_vlan_4k(smi, vid, &vlan4k);
+                       if (ret)
+                               return ret;
+
+                       vlanmc.vid = vid;
+                       vlanmc.member = vlan4k.member;
+                       vlanmc.untag = vlan4k.untag;
+                       vlanmc.fid = vlan4k.fid;
+                       ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+                       if (ret)
+                               return ret;
+
+                       ret = smi->ops->set_mc_index(smi, port, i);
+                       return ret;
+               }
+       }
+
+       dev_err(smi->dev,
+               "all VLAN member configurations are in use\n");
+
+       return -ENOSPC;
+}
+EXPORT_SYMBOL_GPL(rtl8366_set_pvid);
+
+int rtl8366_enable_vlan4k(struct realtek_smi *smi, bool enable)
+{
+       int ret;
+
+       /* To enable 4k VLAN, ordinary VLAN must be enabled first,
+        * but if we disable 4k VLAN it is fine to leave ordinary
+        * VLAN enabled.
+        */
+       if (enable) {
+               /* Make sure VLAN is ON */
+               ret = smi->ops->enable_vlan(smi, true);
+               if (ret)
+                       return ret;
+
+               smi->vlan_enabled = true;
+       }
+
+       ret = smi->ops->enable_vlan4k(smi, enable);
+       if (ret)
+               return ret;
+
+       smi->vlan4k_enabled = enable;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rtl8366_enable_vlan4k);
+
+int rtl8366_enable_vlan(struct realtek_smi *smi, bool enable)
+{
+       int ret;
+
+       ret = smi->ops->enable_vlan(smi, enable);
+       if (ret)
+               return ret;
+
+       smi->vlan_enabled = enable;
+
+       /* If we turn VLAN off, make sure that we turn off
+        * 4k VLAN as well, if that happened to be on.
+        */
+       if (!enable) {
+               smi->vlan4k_enabled = false;
+               ret = smi->ops->enable_vlan4k(smi, false);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(rtl8366_enable_vlan);
+
+int rtl8366_reset_vlan(struct realtek_smi *smi)
+{
+       struct rtl8366_vlan_mc vlanmc;
+       int ret;
+       int i;
+
+       rtl8366_enable_vlan(smi, false);
+       rtl8366_enable_vlan4k(smi, false);
+
+       /* Clear the 16 VLAN member configurations */
+       vlanmc.vid = 0;
+       vlanmc.priority = 0;
+       vlanmc.member = 0;
+       vlanmc.untag = 0;
+       vlanmc.fid = 0;
+       for (i = 0; i < smi->num_vlan_mc; i++) {
+               ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rtl8366_reset_vlan);
+
+int rtl8366_init_vlan(struct realtek_smi *smi)
+{
+       int port;
+       int ret;
+
+       ret = rtl8366_reset_vlan(smi);
+       if (ret)
+               return ret;
+
+       /* Loop over the available ports, for each port, associate
+        * it with the VLAN (port+1)
+        */
+       for (port = 0; port < smi->num_ports; port++) {
+               u32 mask;
+
+               if (port == smi->cpu_port)
+                       /* For the CPU port, make all ports members of this
+                        * VLAN.
+                        */
+                       mask = GENMASK(smi->num_ports - 1, 0);
+               else
+                       /* For all other ports, enable itself plus the
+                        * CPU port.
+                        */
+                       mask = BIT(port) | BIT(smi->cpu_port);
+
+               /* For each port, set the port as member of VLAN (port+1)
+                * and untagged, except for the CPU port: the CPU port (5) is
+                * member of VLAN 6 and so are ALL the other ports as well.
+                * Use filter 0 (no filter).
+                */
+               dev_info(smi->dev, "VLAN%d port mask for port %d, %08x\n",
+                        (port + 1), port, mask);
+               ret = rtl8366_set_vlan(smi, (port + 1), mask, mask, 0);
+               if (ret)
+                       return ret;
+
+               dev_info(smi->dev, "VLAN%d port %d, PVID set to %d\n",
+                        (port + 1), port, (port + 1));
+               ret = rtl8366_set_pvid(smi, port, (port + 1));
+               if (ret)
+                       return ret;
+       }
+
+       return rtl8366_enable_vlan(smi, true);
+}
+EXPORT_SYMBOL_GPL(rtl8366_init_vlan);
+
+int rtl8366_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering)
+{
+       struct realtek_smi *smi = ds->priv;
+       struct rtl8366_vlan_4k vlan4k;
+       int ret;
+
+       if (!smi->ops->is_vlan_valid(smi, port))
+               return -EINVAL;
+
+       dev_info(smi->dev, "%s filtering on port %d\n",
+                vlan_filtering ? "enable" : "disable",
+                port);
+
+       /* TODO:
+        * The hardware support filter ID (FID) 0..7, I have no clue how to
+        * support this in the driver when the callback only says on/off.
+        */
+       ret = smi->ops->get_vlan_4k(smi, port, &vlan4k);
+       if (ret)
+               return ret;
+
+       /* Just set the filter to FID 1 for now then */
+       ret = rtl8366_set_vlan(smi, port,
+                              vlan4k.member,
+                              vlan4k.untag,
+                              1);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rtl8366_vlan_filtering);
+
+int rtl8366_vlan_prepare(struct dsa_switch *ds, int port,
+                        const struct switchdev_obj_port_vlan *vlan)
+{
+       struct realtek_smi *smi = ds->priv;
+       int ret;
+
+       if (!smi->ops->is_vlan_valid(smi, port))
+               return -EINVAL;
+
+       dev_info(smi->dev, "prepare VLANs %04x..%04x\n",
+                vlan->vid_begin, vlan->vid_end);
+
+       /* Enable VLAN in the hardware
+        * FIXME: what's with this 4k business?
+        * Just rtl8366_enable_vlan() seems inconclusive.
+        */
+       ret = rtl8366_enable_vlan4k(smi, true);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rtl8366_vlan_prepare);
+
+void rtl8366_vlan_add(struct dsa_switch *ds, int port,
+                     const struct switchdev_obj_port_vlan *vlan)
+{
+       bool untagged = !!(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED);
+       bool pvid = !!(vlan->flags & BRIDGE_VLAN_INFO_PVID);
+       struct realtek_smi *smi = ds->priv;
+       u32 member = 0;
+       u32 untag = 0;
+       u16 vid;
+       int ret;
+
+       if (!smi->ops->is_vlan_valid(smi, port))
+               return;
+
+       dev_info(smi->dev, "add VLAN on port %d, %s, %s\n",
+                port,
+                untagged ? "untagged" : "tagged",
+                pvid ? " PVID" : "no PVID");
+
+       if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+               dev_err(smi->dev, "port is DSA or CPU port\n");
+
+       for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+               int pvid_val = 0;
+
+               dev_info(smi->dev, "add VLAN %04x\n", vid);
+               member |= BIT(port);
+
+               if (untagged)
+                       untag |= BIT(port);
+
+               /* To ensure that we have a valid MC entry for this VLAN,
+                * initialize the port VLAN ID here.
+                */
+               ret = rtl8366_get_pvid(smi, port, &pvid_val);
+               if (ret < 0) {
+                       dev_err(smi->dev, "could not lookup PVID for port %d\n",
+                               port);
+                       return;
+               }
+               if (pvid_val == 0) {
+                       ret = rtl8366_set_pvid(smi, port, vid);
+                       if (ret < 0)
+                               return;
+               }
+       }
+
+       ret = rtl8366_set_vlan(smi, port, member, untag, 0);
+       if (ret)
+               dev_err(smi->dev,
+                       "failed to set up VLAN %04x",
+                       vid);
+}
+EXPORT_SYMBOL_GPL(rtl8366_vlan_add);
+
+int rtl8366_vlan_del(struct dsa_switch *ds, int port,
+                    const struct switchdev_obj_port_vlan *vlan)
+{
+       struct realtek_smi *smi = ds->priv;
+       u16 vid;
+       int ret;
+
+       dev_info(smi->dev, "del VLAN on port %d\n", port);
+
+       for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+               int i;
+
+               dev_info(smi->dev, "del VLAN %04x\n", vid);
+
+               for (i = 0; i < smi->num_vlan_mc; i++) {
+                       struct rtl8366_vlan_mc vlanmc;
+
+                       ret = smi->ops->get_vlan_mc(smi, i, &vlanmc);
+                       if (ret)
+                               return ret;
+
+                       if (vid == vlanmc.vid) {
+                               /* clear VLAN member configurations */
+                               vlanmc.vid = 0;
+                               vlanmc.priority = 0;
+                               vlanmc.member = 0;
+                               vlanmc.untag = 0;
+                               vlanmc.fid = 0;
+
+                               ret = smi->ops->set_vlan_mc(smi, i, &vlanmc);
+                               if (ret) {
+                                       dev_err(smi->dev,
+                                               "failed to remove VLAN %04x\n",
+                                               vid);
+                                       return ret;
+                               }
+                               break;
+                       }
+               }
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rtl8366_vlan_del);
+
+void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset,
+                        uint8_t *data)
+{
+       struct realtek_smi *smi = ds->priv;
+       struct rtl8366_mib_counter *mib;
+       int i;
+
+       if (port >= smi->num_ports)
+               return;
+
+       for (i = 0; i < smi->num_mib_counters; i++) {
+               mib = &smi->mib_counters[i];
+               strncpy(data + i * ETH_GSTRING_LEN,
+                       mib->name, ETH_GSTRING_LEN);
+       }
+}
+EXPORT_SYMBOL_GPL(rtl8366_get_strings);
+
+int rtl8366_get_sset_count(struct dsa_switch *ds, int port, int sset)
+{
+       struct realtek_smi *smi = ds->priv;
+
+       /* We only support SS_STATS */
+       if (sset != ETH_SS_STATS)
+               return 0;
+       if (port >= smi->num_ports)
+               return -EINVAL;
+
+       return smi->num_mib_counters;
+}
+EXPORT_SYMBOL_GPL(rtl8366_get_sset_count);
+
+void rtl8366_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
+{
+       struct realtek_smi *smi = ds->priv;
+       int i;
+       int ret;
+
+       if (port >= smi->num_ports)
+               return;
+
+       for (i = 0; i < smi->num_mib_counters; i++) {
+               struct rtl8366_mib_counter *mib;
+               u64 mibvalue = 0;
+
+               mib = &smi->mib_counters[i];
+               ret = smi->ops->get_mib_counter(smi, port, mib, &mibvalue);
+               if (ret) {
+                       dev_err(smi->dev, "error reading MIB counter %s\n",
+                               mib->name);
+               }
+               data[i] = mibvalue;
+       }
+}
+EXPORT_SYMBOL_GPL(rtl8366_get_ethtool_stats);
diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c
new file mode 100644 (file)
index 0000000..1e55b9b
--- /dev/null
@@ -0,0 +1,1424 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Realtek SMI subdriver for the Realtek RTL8366RB ethernet switch
+ *
+ * This is a sparsely documented chip, the only viable documentation seems
+ * to be a patched up code drop from the vendor that appear in various
+ * GPL source trees.
+ *
+ * Copyright (C) 2017 Linus Walleij <linus.walleij@linaro.org>
+ * Copyright (C) 2009-2010 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (C) 2010 Antti Seppälä <a.seppala@gmail.com>
+ * Copyright (C) 2010 Roman Yeryomin <roman@advem.lv>
+ * Copyright (C) 2011 Colin Leitner <colin.leitner@googlemail.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
+
+#include "realtek-smi.h"
+
+#define RTL8366RB_PORT_NUM_CPU         5
+#define RTL8366RB_NUM_PORTS            6
+#define RTL8366RB_PHY_NO_MAX           4
+#define RTL8366RB_PHY_ADDR_MAX         31
+
+/* Switch Global Configuration register */
+#define RTL8366RB_SGCR                         0x0000
+#define RTL8366RB_SGCR_EN_BC_STORM_CTRL                BIT(0)
+#define RTL8366RB_SGCR_MAX_LENGTH(a)           ((a) << 4)
+#define RTL8366RB_SGCR_MAX_LENGTH_MASK         RTL8366RB_SGCR_MAX_LENGTH(0x3)
+#define RTL8366RB_SGCR_MAX_LENGTH_1522         RTL8366RB_SGCR_MAX_LENGTH(0x0)
+#define RTL8366RB_SGCR_MAX_LENGTH_1536         RTL8366RB_SGCR_MAX_LENGTH(0x1)
+#define RTL8366RB_SGCR_MAX_LENGTH_1552         RTL8366RB_SGCR_MAX_LENGTH(0x2)
+#define RTL8366RB_SGCR_MAX_LENGTH_9216         RTL8366RB_SGCR_MAX_LENGTH(0x3)
+#define RTL8366RB_SGCR_EN_VLAN                 BIT(13)
+#define RTL8366RB_SGCR_EN_VLAN_4KTB            BIT(14)
+
+/* Port Enable Control register */
+#define RTL8366RB_PECR                         0x0001
+
+/* Switch Security Control registers */
+#define RTL8366RB_SSCR0                                0x0002
+#define RTL8366RB_SSCR1                                0x0003
+#define RTL8366RB_SSCR2                                0x0004
+#define RTL8366RB_SSCR2_DROP_UNKNOWN_DA                BIT(0)
+
+/* Port Mirror Control Register */
+#define RTL8366RB_PMCR                         0x0007
+#define RTL8366RB_PMCR_SOURCE_PORT(a)          (a)
+#define RTL8366RB_PMCR_SOURCE_PORT_MASK                0x000f
+#define RTL8366RB_PMCR_MONITOR_PORT(a)         ((a) << 4)
+#define RTL8366RB_PMCR_MONITOR_PORT_MASK       0x00f0
+#define RTL8366RB_PMCR_MIRROR_RX               BIT(8)
+#define RTL8366RB_PMCR_MIRROR_TX               BIT(9)
+#define RTL8366RB_PMCR_MIRROR_SPC              BIT(10)
+#define RTL8366RB_PMCR_MIRROR_ISO              BIT(11)
+
+/* bits 0..7 = port 0, bits 8..15 = port 1 */
+#define RTL8366RB_PAACR0               0x0010
+/* bits 0..7 = port 2, bits 8..15 = port 3 */
+#define RTL8366RB_PAACR1               0x0011
+/* bits 0..7 = port 4, bits 8..15 = port 5 */
+#define RTL8366RB_PAACR2               0x0012
+#define RTL8366RB_PAACR_SPEED_10M      0
+#define RTL8366RB_PAACR_SPEED_100M     1
+#define RTL8366RB_PAACR_SPEED_1000M    2
+#define RTL8366RB_PAACR_FULL_DUPLEX    BIT(2)
+#define RTL8366RB_PAACR_LINK_UP                BIT(4)
+#define RTL8366RB_PAACR_TX_PAUSE       BIT(5)
+#define RTL8366RB_PAACR_RX_PAUSE       BIT(6)
+#define RTL8366RB_PAACR_AN             BIT(7)
+
+#define RTL8366RB_PAACR_CPU_PORT       (RTL8366RB_PAACR_SPEED_1000M | \
+                                        RTL8366RB_PAACR_FULL_DUPLEX | \
+                                        RTL8366RB_PAACR_LINK_UP | \
+                                        RTL8366RB_PAACR_TX_PAUSE | \
+                                        RTL8366RB_PAACR_RX_PAUSE)
+
+/* bits 0..7 = port 0, bits 8..15 = port 1 */
+#define RTL8366RB_PSTAT0               0x0014
+/* bits 0..7 = port 2, bits 8..15 = port 3 */
+#define RTL8366RB_PSTAT1               0x0015
+/* bits 0..7 = port 4, bits 8..15 = port 5 */
+#define RTL8366RB_PSTAT2               0x0016
+
+#define RTL8366RB_POWER_SAVING_REG     0x0021
+
+/* CPU port control reg */
+#define RTL8368RB_CPU_CTRL_REG         0x0061
+#define RTL8368RB_CPU_PORTS_MSK                0x00FF
+/* Enables inserting custom tag length/type 0x8899 */
+#define RTL8368RB_CPU_INSTAG           BIT(15)
+
+#define RTL8366RB_SMAR0                        0x0070 /* bits 0..15 */
+#define RTL8366RB_SMAR1                        0x0071 /* bits 16..31 */
+#define RTL8366RB_SMAR2                        0x0072 /* bits 32..47 */
+
+#define RTL8366RB_RESET_CTRL_REG               0x0100
+#define RTL8366RB_CHIP_CTRL_RESET_HW           BIT(0)
+#define RTL8366RB_CHIP_CTRL_RESET_SW           BIT(1)
+
+#define RTL8366RB_CHIP_ID_REG                  0x0509
+#define RTL8366RB_CHIP_ID_8366                 0x5937
+#define RTL8366RB_CHIP_VERSION_CTRL_REG                0x050A
+#define RTL8366RB_CHIP_VERSION_MASK            0xf
+
+/* PHY registers control */
+#define RTL8366RB_PHY_ACCESS_CTRL_REG          0x8000
+#define RTL8366RB_PHY_CTRL_READ                        BIT(0)
+#define RTL8366RB_PHY_CTRL_WRITE               0
+#define RTL8366RB_PHY_ACCESS_BUSY_REG          0x8001
+#define RTL8366RB_PHY_INT_BUSY                 BIT(0)
+#define RTL8366RB_PHY_EXT_BUSY                 BIT(4)
+#define RTL8366RB_PHY_ACCESS_DATA_REG          0x8002
+#define RTL8366RB_PHY_EXT_CTRL_REG             0x8010
+#define RTL8366RB_PHY_EXT_WRDATA_REG           0x8011
+#define RTL8366RB_PHY_EXT_RDDATA_REG           0x8012
+
+#define RTL8366RB_PHY_REG_MASK                 0x1f
+#define RTL8366RB_PHY_PAGE_OFFSET              5
+#define RTL8366RB_PHY_PAGE_MASK                        (0xf << 5)
+#define RTL8366RB_PHY_NO_OFFSET                        9
+#define RTL8366RB_PHY_NO_MASK                  (0x1f << 9)
+
+#define RTL8366RB_VLAN_INGRESS_CTRL2_REG       0x037f
+
+/* LED control registers */
+#define RTL8366RB_LED_BLINKRATE_REG            0x0430
+#define RTL8366RB_LED_BLINKRATE_MASK           0x0007
+#define RTL8366RB_LED_BLINKRATE_28MS           0x0000
+#define RTL8366RB_LED_BLINKRATE_56MS           0x0001
+#define RTL8366RB_LED_BLINKRATE_84MS           0x0002
+#define RTL8366RB_LED_BLINKRATE_111MS          0x0003
+#define RTL8366RB_LED_BLINKRATE_222MS          0x0004
+#define RTL8366RB_LED_BLINKRATE_446MS          0x0005
+
+#define RTL8366RB_LED_CTRL_REG                 0x0431
+#define RTL8366RB_LED_OFF                      0x0
+#define RTL8366RB_LED_DUP_COL                  0x1
+#define RTL8366RB_LED_LINK_ACT                 0x2
+#define RTL8366RB_LED_SPD1000                  0x3
+#define RTL8366RB_LED_SPD100                   0x4
+#define RTL8366RB_LED_SPD10                    0x5
+#define RTL8366RB_LED_SPD1000_ACT              0x6
+#define RTL8366RB_LED_SPD100_ACT               0x7
+#define RTL8366RB_LED_SPD10_ACT                        0x8
+#define RTL8366RB_LED_SPD100_10_ACT            0x9
+#define RTL8366RB_LED_FIBER                    0xa
+#define RTL8366RB_LED_AN_FAULT                 0xb
+#define RTL8366RB_LED_LINK_RX                  0xc
+#define RTL8366RB_LED_LINK_TX                  0xd
+#define RTL8366RB_LED_MASTER                   0xe
+#define RTL8366RB_LED_FORCE                    0xf
+#define RTL8366RB_LED_0_1_CTRL_REG             0x0432
+#define RTL8366RB_LED_1_OFFSET                 6
+#define RTL8366RB_LED_2_3_CTRL_REG             0x0433
+#define RTL8366RB_LED_3_OFFSET                 6
+
+#define RTL8366RB_MIB_COUNT                    33
+#define RTL8366RB_GLOBAL_MIB_COUNT             1
+#define RTL8366RB_MIB_COUNTER_PORT_OFFSET      0x0050
+#define RTL8366RB_MIB_COUNTER_BASE             0x1000
+#define RTL8366RB_MIB_CTRL_REG                 0x13F0
+#define RTL8366RB_MIB_CTRL_USER_MASK           0x0FFC
+#define RTL8366RB_MIB_CTRL_BUSY_MASK           BIT(0)
+#define RTL8366RB_MIB_CTRL_RESET_MASK          BIT(1)
+#define RTL8366RB_MIB_CTRL_PORT_RESET(_p)      BIT(2 + (_p))
+#define RTL8366RB_MIB_CTRL_GLOBAL_RESET                BIT(11)
+
+#define RTL8366RB_PORT_VLAN_CTRL_BASE          0x0063
+#define RTL8366RB_PORT_VLAN_CTRL_REG(_p)  \
+               (RTL8366RB_PORT_VLAN_CTRL_BASE + (_p) / 4)
+#define RTL8366RB_PORT_VLAN_CTRL_MASK          0xf
+#define RTL8366RB_PORT_VLAN_CTRL_SHIFT(_p)     (4 * ((_p) % 4))
+
+#define RTL8366RB_VLAN_TABLE_READ_BASE         0x018C
+#define RTL8366RB_VLAN_TABLE_WRITE_BASE                0x0185
+
+#define RTL8366RB_TABLE_ACCESS_CTRL_REG                0x0180
+#define RTL8366RB_TABLE_VLAN_READ_CTRL         0x0E01
+#define RTL8366RB_TABLE_VLAN_WRITE_CTRL                0x0F01
+
+#define RTL8366RB_VLAN_MC_BASE(_x)             (0x0020 + (_x) * 3)
+
+#define RTL8366RB_PORT_LINK_STATUS_BASE                0x0014
+#define RTL8366RB_PORT_STATUS_SPEED_MASK       0x0003
+#define RTL8366RB_PORT_STATUS_DUPLEX_MASK      0x0004
+#define RTL8366RB_PORT_STATUS_LINK_MASK                0x0010
+#define RTL8366RB_PORT_STATUS_TXPAUSE_MASK     0x0020
+#define RTL8366RB_PORT_STATUS_RXPAUSE_MASK     0x0040
+#define RTL8366RB_PORT_STATUS_AN_MASK          0x0080
+
+#define RTL8366RB_NUM_VLANS            16
+#define RTL8366RB_NUM_LEDGROUPS                4
+#define RTL8366RB_NUM_VIDS             4096
+#define RTL8366RB_PRIORITYMAX          7
+#define RTL8366RB_FIDMAX               7
+
+#define RTL8366RB_PORT_1               BIT(0) /* In userspace port 0 */
+#define RTL8366RB_PORT_2               BIT(1) /* In userspace port 1 */
+#define RTL8366RB_PORT_3               BIT(2) /* In userspace port 2 */
+#define RTL8366RB_PORT_4               BIT(3) /* In userspace port 3 */
+#define RTL8366RB_PORT_5               BIT(4) /* In userspace port 4 */
+
+#define RTL8366RB_PORT_CPU             BIT(5) /* CPU port */
+
+#define RTL8366RB_PORT_ALL             (RTL8366RB_PORT_1 |     \
+                                        RTL8366RB_PORT_2 |     \
+                                        RTL8366RB_PORT_3 |     \
+                                        RTL8366RB_PORT_4 |     \
+                                        RTL8366RB_PORT_5 |     \
+                                        RTL8366RB_PORT_CPU)
+
+#define RTL8366RB_PORT_ALL_BUT_CPU     (RTL8366RB_PORT_1 |     \
+                                        RTL8366RB_PORT_2 |     \
+                                        RTL8366RB_PORT_3 |     \
+                                        RTL8366RB_PORT_4 |     \
+                                        RTL8366RB_PORT_5)
+
+#define RTL8366RB_PORT_ALL_EXTERNAL    (RTL8366RB_PORT_1 |     \
+                                        RTL8366RB_PORT_2 |     \
+                                        RTL8366RB_PORT_3 |     \
+                                        RTL8366RB_PORT_4)
+
+#define RTL8366RB_PORT_ALL_INTERNAL     RTL8366RB_PORT_CPU
+
+/* First configuration word per member config, VID and prio */
+#define RTL8366RB_VLAN_VID_MASK                0xfff
+#define RTL8366RB_VLAN_PRIORITY_SHIFT  12
+#define RTL8366RB_VLAN_PRIORITY_MASK   0x7
+/* Second configuration word per member config, member and untagged */
+#define RTL8366RB_VLAN_UNTAG_SHIFT     8
+#define RTL8366RB_VLAN_UNTAG_MASK      0xff
+#define RTL8366RB_VLAN_MEMBER_MASK     0xff
+/* Third config word per member config, STAG currently unused */
+#define RTL8366RB_VLAN_STAG_MBR_MASK   0xff
+#define RTL8366RB_VLAN_STAG_MBR_SHIFT  8
+#define RTL8366RB_VLAN_STAG_IDX_MASK   0x7
+#define RTL8366RB_VLAN_STAG_IDX_SHIFT  5
+#define RTL8366RB_VLAN_FID_MASK                0x7
+
+/* Port ingress bandwidth control */
+#define RTL8366RB_IB_BASE              0x0200
+#define RTL8366RB_IB_REG(pnum)         (RTL8366RB_IB_BASE + (pnum))
+#define RTL8366RB_IB_BDTH_MASK         0x3fff
+#define RTL8366RB_IB_PREIFG            BIT(14)
+
+/* Port egress bandwidth control */
+#define RTL8366RB_EB_BASE              0x02d1
+#define RTL8366RB_EB_REG(pnum)         (RTL8366RB_EB_BASE + (pnum))
+#define RTL8366RB_EB_BDTH_MASK         0x3fff
+#define RTL8366RB_EB_PREIFG_REG                0x02f8
+#define RTL8366RB_EB_PREIFG            BIT(9)
+
+#define RTL8366RB_BDTH_SW_MAX          1048512 /* 1048576? */
+#define RTL8366RB_BDTH_UNIT            64
+#define RTL8366RB_BDTH_REG_DEFAULT     16383
+
+/* QOS */
+#define RTL8366RB_QOS                  BIT(15)
+/* Include/Exclude Preamble and IFG (20 bytes). 0:Exclude, 1:Include. */
+#define RTL8366RB_QOS_DEFAULT_PREIFG   1
+
+/* Interrupt handling */
+#define RTL8366RB_INTERRUPT_CONTROL_REG        0x0440
+#define RTL8366RB_INTERRUPT_POLARITY   BIT(0)
+#define RTL8366RB_P4_RGMII_LED         BIT(2)
+#define RTL8366RB_INTERRUPT_MASK_REG   0x0441
+#define RTL8366RB_INTERRUPT_LINK_CHGALL        GENMASK(11, 0)
+#define RTL8366RB_INTERRUPT_ACLEXCEED  BIT(8)
+#define RTL8366RB_INTERRUPT_STORMEXCEED        BIT(9)
+#define RTL8366RB_INTERRUPT_P4_FIBER   BIT(12)
+#define RTL8366RB_INTERRUPT_P4_UTP     BIT(13)
+#define RTL8366RB_INTERRUPT_VALID      (RTL8366RB_INTERRUPT_LINK_CHGALL | \
+                                        RTL8366RB_INTERRUPT_ACLEXCEED | \
+                                        RTL8366RB_INTERRUPT_STORMEXCEED | \
+                                        RTL8366RB_INTERRUPT_P4_FIBER | \
+                                        RTL8366RB_INTERRUPT_P4_UTP)
+#define RTL8366RB_INTERRUPT_STATUS_REG 0x0442
+#define RTL8366RB_NUM_INTERRUPT                14 /* 0..13 */
+
+/* bits 0..5 enable force when cleared */
+#define RTL8366RB_MAC_FORCE_CTRL_REG   0x0F11
+
+#define RTL8366RB_OAM_PARSER_REG       0x0F14
+#define RTL8366RB_OAM_MULTIPLEXER_REG  0x0F15
+
+#define RTL8366RB_GREEN_FEATURE_REG    0x0F51
+#define RTL8366RB_GREEN_FEATURE_MSK    0x0007
+#define RTL8366RB_GREEN_FEATURE_TX     BIT(0)
+#define RTL8366RB_GREEN_FEATURE_RX     BIT(2)
+
+static struct rtl8366_mib_counter rtl8366rb_mib_counters[] = {
+       { 0,  0, 4, "IfInOctets"                                },
+       { 0,  4, 4, "EtherStatsOctets"                          },
+       { 0,  8, 2, "EtherStatsUnderSizePkts"                   },
+       { 0, 10, 2, "EtherFragments"                            },
+       { 0, 12, 2, "EtherStatsPkts64Octets"                    },
+       { 0, 14, 2, "EtherStatsPkts65to127Octets"               },
+       { 0, 16, 2, "EtherStatsPkts128to255Octets"              },
+       { 0, 18, 2, "EtherStatsPkts256to511Octets"              },
+       { 0, 20, 2, "EtherStatsPkts512to1023Octets"             },
+       { 0, 22, 2, "EtherStatsPkts1024to1518Octets"            },
+       { 0, 24, 2, "EtherOversizeStats"                        },
+       { 0, 26, 2, "EtherStatsJabbers"                         },
+       { 0, 28, 2, "IfInUcastPkts"                             },
+       { 0, 30, 2, "EtherStatsMulticastPkts"                   },
+       { 0, 32, 2, "EtherStatsBroadcastPkts"                   },
+       { 0, 34, 2, "EtherStatsDropEvents"                      },
+       { 0, 36, 2, "Dot3StatsFCSErrors"                        },
+       { 0, 38, 2, "Dot3StatsSymbolErrors"                     },
+       { 0, 40, 2, "Dot3InPauseFrames"                         },
+       { 0, 42, 2, "Dot3ControlInUnknownOpcodes"               },
+       { 0, 44, 4, "IfOutOctets"                               },
+       { 0, 48, 2, "Dot3StatsSingleCollisionFrames"            },
+       { 0, 50, 2, "Dot3StatMultipleCollisionFrames"           },
+       { 0, 52, 2, "Dot3sDeferredTransmissions"                },
+       { 0, 54, 2, "Dot3StatsLateCollisions"                   },
+       { 0, 56, 2, "EtherStatsCollisions"                      },
+       { 0, 58, 2, "Dot3StatsExcessiveCollisions"              },
+       { 0, 60, 2, "Dot3OutPauseFrames"                        },
+       { 0, 62, 2, "Dot1dBasePortDelayExceededDiscards"        },
+       { 0, 64, 2, "Dot1dTpPortInDiscards"                     },
+       { 0, 66, 2, "IfOutUcastPkts"                            },
+       { 0, 68, 2, "IfOutMulticastPkts"                        },
+       { 0, 70, 2, "IfOutBroadcastPkts"                        },
+};
+
+static int rtl8366rb_get_mib_counter(struct realtek_smi *smi,
+                                    int port,
+                                    struct rtl8366_mib_counter *mib,
+                                    u64 *mibvalue)
+{
+       u32 addr, val;
+       int ret;
+       int i;
+
+       addr = RTL8366RB_MIB_COUNTER_BASE +
+               RTL8366RB_MIB_COUNTER_PORT_OFFSET * (port) +
+               mib->offset;
+
+       /* Writing access counter address first
+        * then ASIC will prepare 64bits counter wait for being retrived
+        */
+       ret = regmap_write(smi->map, addr, 0); /* Write whatever */
+       if (ret)
+               return ret;
+
+       /* Read MIB control register */
+       ret = regmap_read(smi->map, RTL8366RB_MIB_CTRL_REG, &val);
+       if (ret)
+               return -EIO;
+
+       if (val & RTL8366RB_MIB_CTRL_BUSY_MASK)
+               return -EBUSY;
+
+       if (val & RTL8366RB_MIB_CTRL_RESET_MASK)
+               return -EIO;
+
+       /* Read each individual MIB 16 bits at the time */
+       *mibvalue = 0;
+       for (i = mib->length; i > 0; i--) {
+               ret = regmap_read(smi->map, addr + (i - 1), &val);
+               if (ret)
+                       return ret;
+               *mibvalue = (*mibvalue << 16) | (val & 0xFFFF);
+       }
+       return 0;
+}
+
+static u32 rtl8366rb_get_irqmask(struct irq_data *d)
+{
+       int line = irqd_to_hwirq(d);
+       u32 val;
+
+       /* For line interrupts we combine link down in bits
+        * 6..11 with link up in bits 0..5 into one interrupt.
+        */
+       if (line < 12)
+               val = BIT(line) | BIT(line + 6);
+       else
+               val = BIT(line);
+       return val;
+}
+
+static void rtl8366rb_mask_irq(struct irq_data *d)
+{
+       struct realtek_smi *smi = irq_data_get_irq_chip_data(d);
+       int ret;
+
+       ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_MASK_REG,
+                                rtl8366rb_get_irqmask(d), 0);
+       if (ret)
+               dev_err(smi->dev, "could not mask IRQ\n");
+}
+
+static void rtl8366rb_unmask_irq(struct irq_data *d)
+{
+       struct realtek_smi *smi = irq_data_get_irq_chip_data(d);
+       int ret;
+
+       ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_MASK_REG,
+                                rtl8366rb_get_irqmask(d),
+                                rtl8366rb_get_irqmask(d));
+       if (ret)
+               dev_err(smi->dev, "could not unmask IRQ\n");
+}
+
+static irqreturn_t rtl8366rb_irq(int irq, void *data)
+{
+       struct realtek_smi *smi = data;
+       u32 stat;
+       int ret;
+
+       /* This clears the IRQ status register */
+       ret = regmap_read(smi->map, RTL8366RB_INTERRUPT_STATUS_REG,
+                         &stat);
+       if (ret) {
+               dev_err(smi->dev, "can't read interrupt status\n");
+               return IRQ_NONE;
+       }
+       stat &= RTL8366RB_INTERRUPT_VALID;
+       if (!stat)
+               return IRQ_NONE;
+       while (stat) {
+               int line = __ffs(stat);
+               int child_irq;
+
+               stat &= ~BIT(line);
+               /* For line interrupts we combine link down in bits
+                * 6..11 with link up in bits 0..5 into one interrupt.
+                */
+               if (line < 12 && line > 5)
+                       line -= 5;
+               child_irq = irq_find_mapping(smi->irqdomain, line);
+               handle_nested_irq(child_irq);
+       }
+       return IRQ_HANDLED;
+}
+
+static struct irq_chip rtl8366rb_irq_chip = {
+       .name = "RTL8366RB",
+       .irq_mask = rtl8366rb_mask_irq,
+       .irq_unmask = rtl8366rb_unmask_irq,
+};
+
+static int rtl8366rb_irq_map(struct irq_domain *domain, unsigned int irq,
+                            irq_hw_number_t hwirq)
+{
+       irq_set_chip_data(irq, domain->host_data);
+       irq_set_chip_and_handler(irq, &rtl8366rb_irq_chip, handle_simple_irq);
+       irq_set_nested_thread(irq, 1);
+       irq_set_noprobe(irq);
+
+       return 0;
+}
+
+static void rtl8366rb_irq_unmap(struct irq_domain *d, unsigned int irq)
+{
+       irq_set_nested_thread(irq, 0);
+       irq_set_chip_and_handler(irq, NULL, NULL);
+       irq_set_chip_data(irq, NULL);
+}
+
+static const struct irq_domain_ops rtl8366rb_irqdomain_ops = {
+       .map = rtl8366rb_irq_map,
+       .unmap = rtl8366rb_irq_unmap,
+       .xlate  = irq_domain_xlate_onecell,
+};
+
+static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi)
+{
+       struct device_node *intc;
+       unsigned long irq_trig;
+       int irq;
+       int ret;
+       u32 val;
+       int i;
+
+       intc = of_get_child_by_name(smi->dev->of_node, "interrupt-controller");
+       if (!intc) {
+               dev_err(smi->dev, "missing child interrupt-controller node\n");
+               return -EINVAL;
+       }
+       /* RB8366RB IRQs cascade off this one */
+       irq = of_irq_get(intc, 0);
+       if (irq <= 0) {
+               dev_err(smi->dev, "failed to get parent IRQ\n");
+               return irq ? irq : -EINVAL;
+       }
+
+       /* This clears the IRQ status register */
+       ret = regmap_read(smi->map, RTL8366RB_INTERRUPT_STATUS_REG,
+                         &val);
+       if (ret) {
+               dev_err(smi->dev, "can't read interrupt status\n");
+               return ret;
+       }
+
+       /* Fetch IRQ edge information from the descriptor */
+       irq_trig = irqd_get_trigger_type(irq_get_irq_data(irq));
+       switch (irq_trig) {
+       case IRQF_TRIGGER_RISING:
+       case IRQF_TRIGGER_HIGH:
+               dev_info(smi->dev, "active high/rising IRQ\n");
+               val = 0;
+               break;
+       case IRQF_TRIGGER_FALLING:
+       case IRQF_TRIGGER_LOW:
+               dev_info(smi->dev, "active low/falling IRQ\n");
+               val = RTL8366RB_INTERRUPT_POLARITY;
+               break;
+       }
+       ret = regmap_update_bits(smi->map, RTL8366RB_INTERRUPT_CONTROL_REG,
+                                RTL8366RB_INTERRUPT_POLARITY,
+                                val);
+       if (ret) {
+               dev_err(smi->dev, "could not configure IRQ polarity\n");
+               return ret;
+       }
+
+       ret = devm_request_threaded_irq(smi->dev, irq, NULL,
+                                       rtl8366rb_irq, IRQF_ONESHOT,
+                                       "RTL8366RB", smi);
+       if (ret) {
+               dev_err(smi->dev, "unable to request irq: %d\n", ret);
+               return ret;
+       }
+       smi->irqdomain = irq_domain_add_linear(intc,
+                                              RTL8366RB_NUM_INTERRUPT,
+                                              &rtl8366rb_irqdomain_ops,
+                                              smi);
+       if (!smi->irqdomain) {
+               dev_err(smi->dev, "failed to create IRQ domain\n");
+               return -EINVAL;
+       }
+       for (i = 0; i < smi->num_ports; i++)
+               irq_set_parent(irq_create_mapping(smi->irqdomain, i), irq);
+
+       return 0;
+}
+
+static int rtl8366rb_set_addr(struct realtek_smi *smi)
+{
+       u8 addr[ETH_ALEN];
+       u16 val;
+       int ret;
+
+       eth_random_addr(addr);
+
+       dev_info(smi->dev, "set MAC: %02X:%02X:%02X:%02X:%02X:%02X\n",
+                addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+       val = addr[0] << 8 | addr[1];
+       ret = regmap_write(smi->map, RTL8366RB_SMAR0, val);
+       if (ret)
+               return ret;
+       val = addr[2] << 8 | addr[3];
+       ret = regmap_write(smi->map, RTL8366RB_SMAR1, val);
+       if (ret)
+               return ret;
+       val = addr[4] << 8 | addr[5];
+       ret = regmap_write(smi->map, RTL8366RB_SMAR2, val);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+/* Found in a vendor driver */
+
+/* For the "version 0" early silicon, appear in most source releases */
+static const u16 rtl8366rb_init_jam_ver_0[] = {
+       0x000B, 0x0001, 0x03A6, 0x0100, 0x03A7, 0x0001, 0x02D1, 0x3FFF,
+       0x02D2, 0x3FFF, 0x02D3, 0x3FFF, 0x02D4, 0x3FFF, 0x02D5, 0x3FFF,
+       0x02D6, 0x3FFF, 0x02D7, 0x3FFF, 0x02D8, 0x3FFF, 0x022B, 0x0688,
+       0x022C, 0x0FAC, 0x03D0, 0x4688, 0x03D1, 0x01F5, 0x0000, 0x0830,
+       0x02F9, 0x0200, 0x02F7, 0x7FFF, 0x02F8, 0x03FF, 0x0080, 0x03E8,
+       0x0081, 0x00CE, 0x0082, 0x00DA, 0x0083, 0x0230, 0xBE0F, 0x2000,
+       0x0231, 0x422A, 0x0232, 0x422A, 0x0233, 0x422A, 0x0234, 0x422A,
+       0x0235, 0x422A, 0x0236, 0x422A, 0x0237, 0x422A, 0x0238, 0x422A,
+       0x0239, 0x422A, 0x023A, 0x422A, 0x023B, 0x422A, 0x023C, 0x422A,
+       0x023D, 0x422A, 0x023E, 0x422A, 0x023F, 0x422A, 0x0240, 0x422A,
+       0x0241, 0x422A, 0x0242, 0x422A, 0x0243, 0x422A, 0x0244, 0x422A,
+       0x0245, 0x422A, 0x0246, 0x422A, 0x0247, 0x422A, 0x0248, 0x422A,
+       0x0249, 0x0146, 0x024A, 0x0146, 0x024B, 0x0146, 0xBE03, 0xC961,
+       0x024D, 0x0146, 0x024E, 0x0146, 0x024F, 0x0146, 0x0250, 0x0146,
+       0xBE64, 0x0226, 0x0252, 0x0146, 0x0253, 0x0146, 0x024C, 0x0146,
+       0x0251, 0x0146, 0x0254, 0x0146, 0xBE62, 0x3FD0, 0x0084, 0x0320,
+       0x0255, 0x0146, 0x0256, 0x0146, 0x0257, 0x0146, 0x0258, 0x0146,
+       0x0259, 0x0146, 0x025A, 0x0146, 0x025B, 0x0146, 0x025C, 0x0146,
+       0x025D, 0x0146, 0x025E, 0x0146, 0x025F, 0x0146, 0x0260, 0x0146,
+       0x0261, 0xA23F, 0x0262, 0x0294, 0x0263, 0xA23F, 0x0264, 0x0294,
+       0x0265, 0xA23F, 0x0266, 0x0294, 0x0267, 0xA23F, 0x0268, 0x0294,
+       0x0269, 0xA23F, 0x026A, 0x0294, 0x026B, 0xA23F, 0x026C, 0x0294,
+       0x026D, 0xA23F, 0x026E, 0x0294, 0x026F, 0xA23F, 0x0270, 0x0294,
+       0x02F5, 0x0048, 0xBE09, 0x0E00, 0xBE1E, 0x0FA0, 0xBE14, 0x8448,
+       0xBE15, 0x1007, 0xBE4A, 0xA284, 0xC454, 0x3F0B, 0xC474, 0x3F0B,
+       0xBE48, 0x3672, 0xBE4B, 0x17A7, 0xBE4C, 0x0B15, 0xBE52, 0x0EDD,
+       0xBE49, 0x8C00, 0xBE5B, 0x785C, 0xBE5C, 0x785C, 0xBE5D, 0x785C,
+       0xBE61, 0x368A, 0xBE63, 0x9B84, 0xC456, 0xCC13, 0xC476, 0xCC13,
+       0xBE65, 0x307D, 0xBE6D, 0x0005, 0xBE6E, 0xE120, 0xBE2E, 0x7BAF,
+};
+
+/* This v1 init sequence is from Belkin F5D8235 U-Boot release */
+static const u16 rtl8366rb_init_jam_ver_1[] = {
+       0x0000, 0x0830, 0x0001, 0x8000, 0x0400, 0x8130, 0xBE78, 0x3C3C,
+       0x0431, 0x5432, 0xBE37, 0x0CE4, 0x02FA, 0xFFDF, 0x02FB, 0xFFE0,
+       0xC44C, 0x1585, 0xC44C, 0x1185, 0xC44C, 0x1585, 0xC46C, 0x1585,
+       0xC46C, 0x1185, 0xC46C, 0x1585, 0xC451, 0x2135, 0xC471, 0x2135,
+       0xBE10, 0x8140, 0xBE15, 0x0007, 0xBE6E, 0xE120, 0xBE69, 0xD20F,
+       0xBE6B, 0x0320, 0xBE24, 0xB000, 0xBE23, 0xFF51, 0xBE22, 0xDF20,
+       0xBE21, 0x0140, 0xBE20, 0x00BB, 0xBE24, 0xB800, 0xBE24, 0x0000,
+       0xBE24, 0x7000, 0xBE23, 0xFF51, 0xBE22, 0xDF60, 0xBE21, 0x0140,
+       0xBE20, 0x0077, 0xBE24, 0x7800, 0xBE24, 0x0000, 0xBE2E, 0x7B7A,
+       0xBE36, 0x0CE4, 0x02F5, 0x0048, 0xBE77, 0x2940, 0x000A, 0x83E0,
+       0xBE79, 0x3C3C, 0xBE00, 0x1340,
+};
+
+/* This v2 init sequence is from Belkin F5D8235 U-Boot release */
+static const u16 rtl8366rb_init_jam_ver_2[] = {
+       0x0450, 0x0000, 0x0400, 0x8130, 0x000A, 0x83ED, 0x0431, 0x5432,
+       0xC44F, 0x6250, 0xC46F, 0x6250, 0xC456, 0x0C14, 0xC476, 0x0C14,
+       0xC44C, 0x1C85, 0xC44C, 0x1885, 0xC44C, 0x1C85, 0xC46C, 0x1C85,
+       0xC46C, 0x1885, 0xC46C, 0x1C85, 0xC44C, 0x0885, 0xC44C, 0x0881,
+       0xC44C, 0x0885, 0xC46C, 0x0885, 0xC46C, 0x0881, 0xC46C, 0x0885,
+       0xBE2E, 0x7BA7, 0xBE36, 0x1000, 0xBE37, 0x1000, 0x8000, 0x0001,
+       0xBE69, 0xD50F, 0x8000, 0x0000, 0xBE69, 0xD50F, 0xBE6E, 0x0320,
+       0xBE77, 0x2940, 0xBE78, 0x3C3C, 0xBE79, 0x3C3C, 0xBE6E, 0xE120,
+       0x8000, 0x0001, 0xBE15, 0x1007, 0x8000, 0x0000, 0xBE15, 0x1007,
+       0xBE14, 0x0448, 0xBE1E, 0x00A0, 0xBE10, 0x8160, 0xBE10, 0x8140,
+       0xBE00, 0x1340, 0x0F51, 0x0010,
+};
+
+/* Appears in a DDWRT code dump */
+static const u16 rtl8366rb_init_jam_ver_3[] = {
+       0x0000, 0x0830, 0x0400, 0x8130, 0x000A, 0x83ED, 0x0431, 0x5432,
+       0x0F51, 0x0017, 0x02F5, 0x0048, 0x02FA, 0xFFDF, 0x02FB, 0xFFE0,
+       0xC456, 0x0C14, 0xC476, 0x0C14, 0xC454, 0x3F8B, 0xC474, 0x3F8B,
+       0xC450, 0x2071, 0xC470, 0x2071, 0xC451, 0x226B, 0xC471, 0x226B,
+       0xC452, 0xA293, 0xC472, 0xA293, 0xC44C, 0x1585, 0xC44C, 0x1185,
+       0xC44C, 0x1585, 0xC46C, 0x1585, 0xC46C, 0x1185, 0xC46C, 0x1585,
+       0xC44C, 0x0185, 0xC44C, 0x0181, 0xC44C, 0x0185, 0xC46C, 0x0185,
+       0xC46C, 0x0181, 0xC46C, 0x0185, 0xBE24, 0xB000, 0xBE23, 0xFF51,
+       0xBE22, 0xDF20, 0xBE21, 0x0140, 0xBE20, 0x00BB, 0xBE24, 0xB800,
+       0xBE24, 0x0000, 0xBE24, 0x7000, 0xBE23, 0xFF51, 0xBE22, 0xDF60,
+       0xBE21, 0x0140, 0xBE20, 0x0077, 0xBE24, 0x7800, 0xBE24, 0x0000,
+       0xBE2E, 0x7BA7, 0xBE36, 0x1000, 0xBE37, 0x1000, 0x8000, 0x0001,
+       0xBE69, 0xD50F, 0x8000, 0x0000, 0xBE69, 0xD50F, 0xBE6B, 0x0320,
+       0xBE77, 0x2800, 0xBE78, 0x3C3C, 0xBE79, 0x3C3C, 0xBE6E, 0xE120,
+       0x8000, 0x0001, 0xBE10, 0x8140, 0x8000, 0x0000, 0xBE10, 0x8140,
+       0xBE15, 0x1007, 0xBE14, 0x0448, 0xBE1E, 0x00A0, 0xBE10, 0x8160,
+       0xBE10, 0x8140, 0xBE00, 0x1340, 0x0450, 0x0000, 0x0401, 0x0000,
+};
+
+/* Belkin F5D8235 v1, "belkin,f5d8235-v1" */
+static const u16 rtl8366rb_init_jam_f5d8235[] = {
+       0x0242, 0x02BF, 0x0245, 0x02BF, 0x0248, 0x02BF, 0x024B, 0x02BF,
+       0x024E, 0x02BF, 0x0251, 0x02BF, 0x0254, 0x0A3F, 0x0256, 0x0A3F,
+       0x0258, 0x0A3F, 0x025A, 0x0A3F, 0x025C, 0x0A3F, 0x025E, 0x0A3F,
+       0x0263, 0x007C, 0x0100, 0x0004, 0xBE5B, 0x3500, 0x800E, 0x200F,
+       0xBE1D, 0x0F00, 0x8001, 0x5011, 0x800A, 0xA2F4, 0x800B, 0x17A3,
+       0xBE4B, 0x17A3, 0xBE41, 0x5011, 0xBE17, 0x2100, 0x8000, 0x8304,
+       0xBE40, 0x8304, 0xBE4A, 0xA2F4, 0x800C, 0xA8D5, 0x8014, 0x5500,
+       0x8015, 0x0004, 0xBE4C, 0xA8D5, 0xBE59, 0x0008, 0xBE09, 0x0E00,
+       0xBE36, 0x1036, 0xBE37, 0x1036, 0x800D, 0x00FF, 0xBE4D, 0x00FF,
+};
+
+/* DGN3500, "netgear,dgn3500", "netgear,dgn3500b" */
+static const u16 rtl8366rb_init_jam_dgn3500[] = {
+       0x0000, 0x0830, 0x0400, 0x8130, 0x000A, 0x83ED, 0x0F51, 0x0017,
+       0x02F5, 0x0048, 0x02FA, 0xFFDF, 0x02FB, 0xFFE0, 0x0450, 0x0000,
+       0x0401, 0x0000, 0x0431, 0x0960,
+};
+
+/* This jam table activates "green ethernet", which means low power mode
+ * and is claimed to detect the cable length and not use more power than
+ * necessary, and the ports should enter power saving mode 10 seconds after
+ * a cable is disconnected. Seems to always be the same.
+ */
+static const u16 rtl8366rb_green_jam[][2] = {
+       {0xBE78, 0x323C}, {0xBE77, 0x5000}, {0xBE2E, 0x7BA7},
+       {0xBE59, 0x3459}, {0xBE5A, 0x745A}, {0xBE5B, 0x785C},
+       {0xBE5C, 0x785C}, {0xBE6E, 0xE120}, {0xBE79, 0x323C},
+};
+
+static int rtl8366rb_setup(struct dsa_switch *ds)
+{
+       struct realtek_smi *smi = ds->priv;
+       const u16 *jam_table;
+       u32 chip_ver = 0;
+       u32 chip_id = 0;
+       int jam_size;
+       u32 val;
+       int ret;
+       int i;
+
+       ret = regmap_read(smi->map, RTL8366RB_CHIP_ID_REG, &chip_id);
+       if (ret) {
+               dev_err(smi->dev, "unable to read chip id\n");
+               return ret;
+       }
+
+       switch (chip_id) {
+       case RTL8366RB_CHIP_ID_8366:
+               break;
+       default:
+               dev_err(smi->dev, "unknown chip id (%04x)\n", chip_id);
+               return -ENODEV;
+       }
+
+       ret = regmap_read(smi->map, RTL8366RB_CHIP_VERSION_CTRL_REG,
+                         &chip_ver);
+       if (ret) {
+               dev_err(smi->dev, "unable to read chip version\n");
+               return ret;
+       }
+
+       dev_info(smi->dev, "RTL%04x ver %u chip found\n",
+                chip_id, chip_ver & RTL8366RB_CHIP_VERSION_MASK);
+
+       /* Do the init dance using the right jam table */
+       switch (chip_ver) {
+       case 0:
+               jam_table = rtl8366rb_init_jam_ver_0;
+               jam_size = ARRAY_SIZE(rtl8366rb_init_jam_ver_0);
+               break;
+       case 1:
+               jam_table = rtl8366rb_init_jam_ver_1;
+               jam_size = ARRAY_SIZE(rtl8366rb_init_jam_ver_1);
+               break;
+       case 2:
+               jam_table = rtl8366rb_init_jam_ver_2;
+               jam_size = ARRAY_SIZE(rtl8366rb_init_jam_ver_2);
+               break;
+       default:
+               jam_table = rtl8366rb_init_jam_ver_3;
+               jam_size = ARRAY_SIZE(rtl8366rb_init_jam_ver_3);
+               break;
+       }
+
+       /* Special jam tables for special routers
+        * TODO: are these necessary? Maintainers, please test
+        * without them, using just the off-the-shelf tables.
+        */
+       if (of_machine_is_compatible("belkin,f5d8235-v1")) {
+               jam_table = rtl8366rb_init_jam_f5d8235;
+               jam_size = ARRAY_SIZE(rtl8366rb_init_jam_f5d8235);
+       }
+       if (of_machine_is_compatible("netgear,dgn3500") ||
+           of_machine_is_compatible("netgear,dgn3500b")) {
+               jam_table = rtl8366rb_init_jam_dgn3500;
+               jam_size = ARRAY_SIZE(rtl8366rb_init_jam_dgn3500);
+       }
+
+       i = 0;
+       while (i < jam_size) {
+               if ((jam_table[i] & 0xBE00) == 0xBE00) {
+                       ret = regmap_read(smi->map,
+                                         RTL8366RB_PHY_ACCESS_BUSY_REG,
+                                         &val);
+                       if (ret)
+                               return ret;
+                       if (!(val & RTL8366RB_PHY_INT_BUSY)) {
+                               ret = regmap_write(smi->map,
+                                               RTL8366RB_PHY_ACCESS_CTRL_REG,
+                                               RTL8366RB_PHY_CTRL_WRITE);
+                               if (ret)
+                                       return ret;
+                       }
+               }
+               dev_dbg(smi->dev, "jam %04x into register %04x\n",
+                       jam_table[i + 1],
+                       jam_table[i]);
+               ret = regmap_write(smi->map,
+                                  jam_table[i],
+                                  jam_table[i + 1]);
+               if (ret)
+                       return ret;
+               i += 2;
+       }
+
+       /* Set up the "green ethernet" feature */
+       i = 0;
+       while (i < ARRAY_SIZE(rtl8366rb_green_jam)) {
+               ret = regmap_read(smi->map, RTL8366RB_PHY_ACCESS_BUSY_REG,
+                                 &val);
+               if (ret)
+                       return ret;
+               if (!(val & RTL8366RB_PHY_INT_BUSY)) {
+                       ret = regmap_write(smi->map,
+                                          RTL8366RB_PHY_ACCESS_CTRL_REG,
+                                          RTL8366RB_PHY_CTRL_WRITE);
+                       if (ret)
+                               return ret;
+                       ret = regmap_write(smi->map,
+                                          rtl8366rb_green_jam[i][0],
+                                          rtl8366rb_green_jam[i][1]);
+                       if (ret)
+                               return ret;
+                       i++;
+               }
+       }
+       ret = regmap_write(smi->map,
+                          RTL8366RB_GREEN_FEATURE_REG,
+                          (chip_ver == 1) ? 0x0007 : 0x0003);
+       if (ret)
+               return ret;
+
+       /* Vendor driver sets 0x240 in registers 0xc and 0xd (undocumented) */
+       ret = regmap_write(smi->map, 0x0c, 0x240);
+       if (ret)
+               return ret;
+       ret = regmap_write(smi->map, 0x0d, 0x240);
+       if (ret)
+               return ret;
+
+       /* Set some random MAC address */
+       ret = rtl8366rb_set_addr(smi);
+       if (ret)
+               return ret;
+
+       /* Enable CPU port and enable inserting CPU tag
+        *
+        * Disabling RTL8368RB_CPU_INSTAG here will change the behaviour
+        * of the switch totally and it will start talking Realtek RRCP
+        * internally. It is probably possible to experiment with this,
+        * but then the kernel needs to understand and handle RRCP first.
+        */
+       ret = regmap_update_bits(smi->map, RTL8368RB_CPU_CTRL_REG,
+                                0xFFFF,
+                                RTL8368RB_CPU_INSTAG | BIT(smi->cpu_port));
+       if (ret)
+               return ret;
+
+       /* Make sure we default-enable the fixed CPU port */
+       ret = regmap_update_bits(smi->map, RTL8366RB_PECR,
+                                BIT(smi->cpu_port),
+                                0);
+       if (ret)
+               return ret;
+
+       /* Set maximum packet length to 1536 bytes */
+       ret = regmap_update_bits(smi->map, RTL8366RB_SGCR,
+                                RTL8366RB_SGCR_MAX_LENGTH_MASK,
+                                RTL8366RB_SGCR_MAX_LENGTH_1536);
+       if (ret)
+               return ret;
+
+       /* Enable learning for all ports */
+       ret = regmap_write(smi->map, RTL8366RB_SSCR0, 0);
+       if (ret)
+               return ret;
+
+       /* Enable auto ageing for all ports */
+       ret = regmap_write(smi->map, RTL8366RB_SSCR1, 0);
+       if (ret)
+               return ret;
+
+       /* Discard VLAN tagged packets if the port is not a member of
+        * the VLAN with which the packets is associated.
+        */
+       ret = regmap_write(smi->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG,
+                          RTL8366RB_PORT_ALL);
+       if (ret)
+               return ret;
+
+       /* Don't drop packets whose DA has not been learned */
+       ret = regmap_update_bits(smi->map, RTL8366RB_SSCR2,
+                                RTL8366RB_SSCR2_DROP_UNKNOWN_DA, 0);
+       if (ret)
+               return ret;
+
+       /* Set blinking, TODO: make this configurable */
+       ret = regmap_update_bits(smi->map, RTL8366RB_LED_BLINKRATE_REG,
+                                RTL8366RB_LED_BLINKRATE_MASK,
+                                RTL8366RB_LED_BLINKRATE_56MS);
+       if (ret)
+               return ret;
+
+       /* Set up LED activity:
+        * Each port has 4 LEDs, we configure all ports to the same
+        * behaviour (no individual config) but we can set up each
+        * LED separately.
+        */
+       if (smi->leds_disabled) {
+               /* Turn everything off */
+               regmap_update_bits(smi->map,
+                                  RTL8366RB_LED_0_1_CTRL_REG,
+                                  0x0FFF, 0);
+               regmap_update_bits(smi->map,
+                                  RTL8366RB_LED_2_3_CTRL_REG,
+                                  0x0FFF, 0);
+               regmap_update_bits(smi->map,
+                                  RTL8366RB_INTERRUPT_CONTROL_REG,
+                                  RTL8366RB_P4_RGMII_LED,
+                                  0);
+               val = RTL8366RB_LED_OFF;
+       } else {
+               /* TODO: make this configurable per LED */
+               val = RTL8366RB_LED_FORCE;
+       }
+       for (i = 0; i < 4; i++) {
+               ret = regmap_update_bits(smi->map,
+                                        RTL8366RB_LED_CTRL_REG,
+                                        0xf << (i * 4),
+                                        val << (i * 4));
+               if (ret)
+                       return ret;
+       }
+
+       ret = rtl8366_init_vlan(smi);
+       if (ret)
+               return ret;
+
+       ret = rtl8366rb_setup_cascaded_irq(smi);
+       if (ret)
+               dev_info(smi->dev, "no interrupt support\n");
+
+       ret = realtek_smi_setup_mdio(smi);
+       if (ret) {
+               dev_info(smi->dev, "could not set up MDIO bus\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static enum dsa_tag_protocol rtl8366_get_tag_protocol(struct dsa_switch *ds,
+                                                     int port)
+{
+       /* For now, the RTL switches are handled without any custom tags.
+        *
+        * It is possible to turn on "custom tags" by removing the
+        * RTL8368RB_CPU_INSTAG flag when enabling the port but what it
+        * does is unfamiliar to DSA: ethernet frames of type 8899, the Realtek
+        * Remote Control Protocol (RRCP) start to appear on the CPU port of
+        * the device. So this is not the ordinary few extra bytes in the
+        * frame. Instead it appears that the switch starts to talk Realtek
+        * RRCP internally which means a pretty complex RRCP implementation
+        * decoding and responding the RRCP protocol is needed to exploit this.
+        *
+        * The OpenRRCP project (dormant since 2009) have reverse-egineered
+        * parts of the protocol.
+        */
+       return DSA_TAG_PROTO_NONE;
+}
+
+static void rtl8366rb_adjust_link(struct dsa_switch *ds, int port,
+                                 struct phy_device *phydev)
+{
+       struct realtek_smi *smi = ds->priv;
+       int ret;
+
+       if (port != smi->cpu_port)
+               return;
+
+       dev_info(smi->dev, "adjust link on CPU port (%d)\n", port);
+
+       /* Force the fixed CPU port into 1Gbit mode, no autonegotiation */
+       ret = regmap_update_bits(smi->map, RTL8366RB_MAC_FORCE_CTRL_REG,
+                                BIT(port), BIT(port));
+       if (ret)
+               return;
+
+       ret = regmap_update_bits(smi->map, RTL8366RB_PAACR2,
+                                0xFF00U,
+                                RTL8366RB_PAACR_CPU_PORT << 8);
+       if (ret)
+               return;
+
+       /* Enable the CPU port */
+       ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+                                0);
+       if (ret)
+               return;
+}
+
+static void rb8366rb_set_port_led(struct realtek_smi *smi,
+                                 int port, bool enable)
+{
+       u16 val = enable ? 0x3f : 0;
+       int ret;
+
+       if (smi->leds_disabled)
+               return;
+
+       switch (port) {
+       case 0:
+               ret = regmap_update_bits(smi->map,
+                                        RTL8366RB_LED_0_1_CTRL_REG,
+                                        0x3F, val);
+               break;
+       case 1:
+               ret = regmap_update_bits(smi->map,
+                                        RTL8366RB_LED_0_1_CTRL_REG,
+                                        0x3F << RTL8366RB_LED_1_OFFSET,
+                                        val << RTL8366RB_LED_1_OFFSET);
+               break;
+       case 2:
+               ret = regmap_update_bits(smi->map,
+                                        RTL8366RB_LED_2_3_CTRL_REG,
+                                        0x3F, val);
+               break;
+       case 3:
+               ret = regmap_update_bits(smi->map,
+                                        RTL8366RB_LED_2_3_CTRL_REG,
+                                        0x3F << RTL8366RB_LED_3_OFFSET,
+                                        val << RTL8366RB_LED_3_OFFSET);
+               break;
+       case 4:
+               ret = regmap_update_bits(smi->map,
+                                        RTL8366RB_INTERRUPT_CONTROL_REG,
+                                        RTL8366RB_P4_RGMII_LED,
+                                        enable ? RTL8366RB_P4_RGMII_LED : 0);
+               break;
+       default:
+               dev_err(smi->dev, "no LED for port %d\n", port);
+               return;
+       }
+       if (ret)
+               dev_err(smi->dev, "error updating LED on port %d\n", port);
+}
+
+static int
+rtl8366rb_port_enable(struct dsa_switch *ds, int port,
+                     struct phy_device *phy)
+{
+       struct realtek_smi *smi = ds->priv;
+       int ret;
+
+       dev_dbg(smi->dev, "enable port %d\n", port);
+       ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+                                0);
+       if (ret)
+               return ret;
+
+       rb8366rb_set_port_led(smi, port, true);
+       return 0;
+}
+
+static void
+rtl8366rb_port_disable(struct dsa_switch *ds, int port,
+                      struct phy_device *phy)
+{
+       struct realtek_smi *smi = ds->priv;
+       int ret;
+
+       dev_dbg(smi->dev, "disable port %d\n", port);
+       ret = regmap_update_bits(smi->map, RTL8366RB_PECR, BIT(port),
+                                BIT(port));
+       if (ret)
+               return;
+
+       rb8366rb_set_port_led(smi, port, false);
+}
+
+static int rtl8366rb_get_vlan_4k(struct realtek_smi *smi, u32 vid,
+                                struct rtl8366_vlan_4k *vlan4k)
+{
+       u32 data[3];
+       int ret;
+       int i;
+
+       memset(vlan4k, '\0', sizeof(struct rtl8366_vlan_4k));
+
+       if (vid >= RTL8366RB_NUM_VIDS)
+               return -EINVAL;
+
+       /* write VID */
+       ret = regmap_write(smi->map, RTL8366RB_VLAN_TABLE_WRITE_BASE,
+                          vid & RTL8366RB_VLAN_VID_MASK);
+       if (ret)
+               return ret;
+
+       /* write table access control word */
+       ret = regmap_write(smi->map, RTL8366RB_TABLE_ACCESS_CTRL_REG,
+                          RTL8366RB_TABLE_VLAN_READ_CTRL);
+       if (ret)
+               return ret;
+
+       for (i = 0; i < 3; i++) {
+               ret = regmap_read(smi->map,
+                                 RTL8366RB_VLAN_TABLE_READ_BASE + i,
+                                 &data[i]);
+               if (ret)
+                       return ret;
+       }
+
+       vlan4k->vid = vid;
+       vlan4k->untag = (data[1] >> RTL8366RB_VLAN_UNTAG_SHIFT) &
+                       RTL8366RB_VLAN_UNTAG_MASK;
+       vlan4k->member = data[1] & RTL8366RB_VLAN_MEMBER_MASK;
+       vlan4k->fid = data[2] & RTL8366RB_VLAN_FID_MASK;
+
+       return 0;
+}
+
+static int rtl8366rb_set_vlan_4k(struct realtek_smi *smi,
+                                const struct rtl8366_vlan_4k *vlan4k)
+{
+       u32 data[3];
+       int ret;
+       int i;
+
+       if (vlan4k->vid >= RTL8366RB_NUM_VIDS ||
+           vlan4k->member > RTL8366RB_VLAN_MEMBER_MASK ||
+           vlan4k->untag > RTL8366RB_VLAN_UNTAG_MASK ||
+           vlan4k->fid > RTL8366RB_FIDMAX)
+               return -EINVAL;
+
+       data[0] = vlan4k->vid & RTL8366RB_VLAN_VID_MASK;
+       data[1] = (vlan4k->member & RTL8366RB_VLAN_MEMBER_MASK) |
+                 ((vlan4k->untag & RTL8366RB_VLAN_UNTAG_MASK) <<
+                       RTL8366RB_VLAN_UNTAG_SHIFT);
+       data[2] = vlan4k->fid & RTL8366RB_VLAN_FID_MASK;
+
+       for (i = 0; i < 3; i++) {
+               ret = regmap_write(smi->map,
+                                  RTL8366RB_VLAN_TABLE_WRITE_BASE + i,
+                                  data[i]);
+               if (ret)
+                       return ret;
+       }
+
+       /* write table access control word */
+       ret = regmap_write(smi->map, RTL8366RB_TABLE_ACCESS_CTRL_REG,
+                          RTL8366RB_TABLE_VLAN_WRITE_CTRL);
+
+       return ret;
+}
+
+static int rtl8366rb_get_vlan_mc(struct realtek_smi *smi, u32 index,
+                                struct rtl8366_vlan_mc *vlanmc)
+{
+       u32 data[3];
+       int ret;
+       int i;
+
+       memset(vlanmc, '\0', sizeof(struct rtl8366_vlan_mc));
+
+       if (index >= RTL8366RB_NUM_VLANS)
+               return -EINVAL;
+
+       for (i = 0; i < 3; i++) {
+               ret = regmap_read(smi->map,
+                                 RTL8366RB_VLAN_MC_BASE(index) + i,
+                                 &data[i]);
+               if (ret)
+                       return ret;
+       }
+
+       vlanmc->vid = data[0] & RTL8366RB_VLAN_VID_MASK;
+       vlanmc->priority = (data[0] >> RTL8366RB_VLAN_PRIORITY_SHIFT) &
+               RTL8366RB_VLAN_PRIORITY_MASK;
+       vlanmc->untag = (data[1] >> RTL8366RB_VLAN_UNTAG_SHIFT) &
+               RTL8366RB_VLAN_UNTAG_MASK;
+       vlanmc->member = data[1] & RTL8366RB_VLAN_MEMBER_MASK;
+       vlanmc->fid = data[2] & RTL8366RB_VLAN_FID_MASK;
+
+       return 0;
+}
+
+static int rtl8366rb_set_vlan_mc(struct realtek_smi *smi, u32 index,
+                                const struct rtl8366_vlan_mc *vlanmc)
+{
+       u32 data[3];
+       int ret;
+       int i;
+
+       if (index >= RTL8366RB_NUM_VLANS ||
+           vlanmc->vid >= RTL8366RB_NUM_VIDS ||
+           vlanmc->priority > RTL8366RB_PRIORITYMAX ||
+           vlanmc->member > RTL8366RB_VLAN_MEMBER_MASK ||
+           vlanmc->untag > RTL8366RB_VLAN_UNTAG_MASK ||
+           vlanmc->fid > RTL8366RB_FIDMAX)
+               return -EINVAL;
+
+       data[0] = (vlanmc->vid & RTL8366RB_VLAN_VID_MASK) |
+                 ((vlanmc->priority & RTL8366RB_VLAN_PRIORITY_MASK) <<
+                       RTL8366RB_VLAN_PRIORITY_SHIFT);
+       data[1] = (vlanmc->member & RTL8366RB_VLAN_MEMBER_MASK) |
+                 ((vlanmc->untag & RTL8366RB_VLAN_UNTAG_MASK) <<
+                       RTL8366RB_VLAN_UNTAG_SHIFT);
+       data[2] = vlanmc->fid & RTL8366RB_VLAN_FID_MASK;
+
+       for (i = 0; i < 3; i++) {
+               ret = regmap_write(smi->map,
+                                  RTL8366RB_VLAN_MC_BASE(index) + i,
+                                  data[i]);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int rtl8366rb_get_mc_index(struct realtek_smi *smi, int port, int *val)
+{
+       u32 data;
+       int ret;
+
+       if (port >= smi->num_ports)
+               return -EINVAL;
+
+       ret = regmap_read(smi->map, RTL8366RB_PORT_VLAN_CTRL_REG(port),
+                         &data);
+       if (ret)
+               return ret;
+
+       *val = (data >> RTL8366RB_PORT_VLAN_CTRL_SHIFT(port)) &
+               RTL8366RB_PORT_VLAN_CTRL_MASK;
+
+       return 0;
+}
+
+static int rtl8366rb_set_mc_index(struct realtek_smi *smi, int port, int index)
+{
+       if (port >= smi->num_ports || index >= RTL8366RB_NUM_VLANS)
+               return -EINVAL;
+
+       return regmap_update_bits(smi->map, RTL8366RB_PORT_VLAN_CTRL_REG(port),
+                               RTL8366RB_PORT_VLAN_CTRL_MASK <<
+                                       RTL8366RB_PORT_VLAN_CTRL_SHIFT(port),
+                               (index & RTL8366RB_PORT_VLAN_CTRL_MASK) <<
+                                       RTL8366RB_PORT_VLAN_CTRL_SHIFT(port));
+}
+
+static bool rtl8366rb_is_vlan_valid(struct realtek_smi *smi, unsigned int vlan)
+{
+       unsigned int max = RTL8366RB_NUM_VLANS;
+
+       if (smi->vlan4k_enabled)
+               max = RTL8366RB_NUM_VIDS - 1;
+
+       if (vlan == 0 || vlan >= max)
+               return false;
+
+       return true;
+}
+
+static int rtl8366rb_enable_vlan(struct realtek_smi *smi, bool enable)
+{
+       dev_dbg(smi->dev, "%s VLAN\n", enable ? "enable" : "disable");
+       return regmap_update_bits(smi->map,
+                                 RTL8366RB_SGCR, RTL8366RB_SGCR_EN_VLAN,
+                                 enable ? RTL8366RB_SGCR_EN_VLAN : 0);
+}
+
+static int rtl8366rb_enable_vlan4k(struct realtek_smi *smi, bool enable)
+{
+       dev_dbg(smi->dev, "%s VLAN 4k\n", enable ? "enable" : "disable");
+       return regmap_update_bits(smi->map, RTL8366RB_SGCR,
+                                 RTL8366RB_SGCR_EN_VLAN_4KTB,
+                                 enable ? RTL8366RB_SGCR_EN_VLAN_4KTB : 0);
+}
+
+static int rtl8366rb_phy_read(struct realtek_smi *smi, int phy, int regnum)
+{
+       u32 val;
+       u32 reg;
+       int ret;
+
+       if (phy > RTL8366RB_PHY_NO_MAX)
+               return -EINVAL;
+
+       ret = regmap_write(smi->map, RTL8366RB_PHY_ACCESS_CTRL_REG,
+                          RTL8366RB_PHY_CTRL_READ);
+       if (ret)
+               return ret;
+
+       reg = 0x8000 | (1 << (phy + RTL8366RB_PHY_NO_OFFSET)) | regnum;
+
+       ret = regmap_write(smi->map, reg, 0);
+       if (ret) {
+               dev_err(smi->dev,
+                       "failed to write PHY%d reg %04x @ %04x, ret %d\n",
+                       phy, regnum, reg, ret);
+               return ret;
+       }
+
+       ret = regmap_read(smi->map, RTL8366RB_PHY_ACCESS_DATA_REG, &val);
+       if (ret)
+               return ret;
+
+       dev_dbg(smi->dev, "read PHY%d register 0x%04x @ %08x, val <- %04x\n",
+               phy, regnum, reg, val);
+
+       return val;
+}
+
+static int rtl8366rb_phy_write(struct realtek_smi *smi, int phy, int regnum,
+                              u16 val)
+{
+       u32 reg;
+       int ret;
+
+       if (phy > RTL8366RB_PHY_NO_MAX)
+               return -EINVAL;
+
+       ret = regmap_write(smi->map, RTL8366RB_PHY_ACCESS_CTRL_REG,
+                          RTL8366RB_PHY_CTRL_WRITE);
+       if (ret)
+               return ret;
+
+       reg = 0x8000 | (1 << (phy + RTL8366RB_PHY_NO_OFFSET)) | regnum;
+
+       dev_dbg(smi->dev, "write PHY%d register 0x%04x @ %04x, val -> %04x\n",
+               phy, regnum, reg, val);
+
+       ret = regmap_write(smi->map, reg, val);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int rtl8366rb_reset_chip(struct realtek_smi *smi)
+{
+       int timeout = 10;
+       u32 val;
+       int ret;
+
+       realtek_smi_write_reg_noack(smi, RTL8366RB_RESET_CTRL_REG,
+                                   RTL8366RB_CHIP_CTRL_RESET_HW);
+       do {
+               usleep_range(20000, 25000);
+               ret = regmap_read(smi->map, RTL8366RB_RESET_CTRL_REG, &val);
+               if (ret)
+                       return ret;
+
+               if (!(val & RTL8366RB_CHIP_CTRL_RESET_HW))
+                       break;
+       } while (--timeout);
+
+       if (!timeout) {
+               dev_err(smi->dev, "timeout waiting for the switch to reset\n");
+               return -EIO;
+       }
+
+       return 0;
+}
+
+static int rtl8366rb_detect(struct realtek_smi *smi)
+{
+       struct device *dev = smi->dev;
+       int ret;
+       u32 val;
+
+       /* Detect device */
+       ret = regmap_read(smi->map, 0x5c, &val);
+       if (ret) {
+               dev_err(dev, "can't get chip ID (%d)\n", ret);
+               return ret;
+       }
+
+       switch (val) {
+       case 0x6027:
+               dev_info(dev, "found an RTL8366S switch\n");
+               dev_err(dev, "this switch is not yet supported, submit patches!\n");
+               return -ENODEV;
+       case 0x5937:
+               dev_info(dev, "found an RTL8366RB switch\n");
+               smi->cpu_port = RTL8366RB_PORT_NUM_CPU;
+               smi->num_ports = RTL8366RB_NUM_PORTS;
+               smi->num_vlan_mc = RTL8366RB_NUM_VLANS;
+               smi->mib_counters = rtl8366rb_mib_counters;
+               smi->num_mib_counters = ARRAY_SIZE(rtl8366rb_mib_counters);
+               break;
+       default:
+               dev_info(dev, "found an Unknown Realtek switch (id=0x%04x)\n",
+                        val);
+               break;
+       }
+
+       ret = rtl8366rb_reset_chip(smi);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static const struct dsa_switch_ops rtl8366rb_switch_ops = {
+       .get_tag_protocol = rtl8366_get_tag_protocol,
+       .setup = rtl8366rb_setup,
+       .adjust_link = rtl8366rb_adjust_link,
+       .get_strings = rtl8366_get_strings,
+       .get_ethtool_stats = rtl8366_get_ethtool_stats,
+       .get_sset_count = rtl8366_get_sset_count,
+       .port_vlan_filtering = rtl8366_vlan_filtering,
+       .port_vlan_prepare = rtl8366_vlan_prepare,
+       .port_vlan_add = rtl8366_vlan_add,
+       .port_vlan_del = rtl8366_vlan_del,
+       .port_enable = rtl8366rb_port_enable,
+       .port_disable = rtl8366rb_port_disable,
+};
+
+static const struct realtek_smi_ops rtl8366rb_smi_ops = {
+       .detect         = rtl8366rb_detect,
+       .get_vlan_mc    = rtl8366rb_get_vlan_mc,
+       .set_vlan_mc    = rtl8366rb_set_vlan_mc,
+       .get_vlan_4k    = rtl8366rb_get_vlan_4k,
+       .set_vlan_4k    = rtl8366rb_set_vlan_4k,
+       .get_mc_index   = rtl8366rb_get_mc_index,
+       .set_mc_index   = rtl8366rb_set_mc_index,
+       .get_mib_counter = rtl8366rb_get_mib_counter,
+       .is_vlan_valid  = rtl8366rb_is_vlan_valid,
+       .enable_vlan    = rtl8366rb_enable_vlan,
+       .enable_vlan4k  = rtl8366rb_enable_vlan4k,
+       .phy_read       = rtl8366rb_phy_read,
+       .phy_write      = rtl8366rb_phy_write,
+};
+
+const struct realtek_smi_variant rtl8366rb_variant = {
+       .ds_ops = &rtl8366rb_switch_ops,
+       .ops = &rtl8366rb_smi_ops,
+       .clk_delay = 10,
+       .cmd_read = 0xa9,
+       .cmd_write = 0xa8,
+};
+EXPORT_SYMBOL_GPL(rtl8366rb_variant);
diff --git a/drivers/net/dsa/vitesse-vsc73xx.c b/drivers/net/dsa/vitesse-vsc73xx.c
new file mode 100644 (file)
index 0000000..9f1b5f2
--- /dev/null
@@ -0,0 +1,1365 @@
+// SPDX-License-Identifier: GPL-2.0
+/* DSA driver for:
+ * Vitesse VSC7385 SparX-G5 5+1-port Integrated Gigabit Ethernet Switch
+ * Vitesse VSC7388 SparX-G8 8-port Integrated Gigabit Ethernet Switch
+ * Vitesse VSC7395 SparX-G5e 5+1-port Integrated Gigabit Ethernet Switch
+ * Vitesse VSC7398 SparX-G8e 8-port Integrated Gigabit Ethernet Switch
+ *
+ * These switches have a built-in 8051 CPU and can download and execute a
+ * firmware in this CPU. They can also be configured to use an external CPU
+ * handling the switch in a memory-mapped manner by connecting to that external
+ * CPU's memory bus.
+ *
+ * This driver (currently) only takes control of the switch chip over SPI and
+ * configures it to route packages around when connected to a CPU port. The
+ * chip has embedded PHYs and VLAN support so we model it using DSA.
+ *
+ * Copyright (C) 2018 Linus Wallej <linus.walleij@linaro.org>
+ * Includes portions of code from the firmware uploader by:
+ * Copyright (C) 2009 Gabor Juhos <juhosg@openwrt.org>
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/bitops.h>
+#include <linux/if_bridge.h>
+#include <linux/etherdevice.h>
+#include <linux/gpio/consumer.h>
+#include <linux/gpio/driver.h>
+#include <linux/random.h>
+#include <net/dsa.h>
+
+#define VSC73XX_BLOCK_MAC      0x1 /* Subblocks 0-4, 6 (CPU port) */
+#define VSC73XX_BLOCK_ANALYZER 0x2 /* Only subblock 0 */
+#define VSC73XX_BLOCK_MII      0x3 /* Subblocks 0 and 1 */
+#define VSC73XX_BLOCK_MEMINIT  0x3 /* Only subblock 2 */
+#define VSC73XX_BLOCK_CAPTURE  0x4 /* Only subblock 2 */
+#define VSC73XX_BLOCK_ARBITER  0x5 /* Only subblock 0 */
+#define VSC73XX_BLOCK_SYSTEM   0x7 /* Only subblock 0 */
+
+#define CPU_PORT       6 /* CPU port */
+
+/* MAC Block registers */
+#define VSC73XX_MAC_CFG                0x00
+#define VSC73XX_MACHDXGAP      0x02
+#define VSC73XX_FCCONF         0x04
+#define VSC73XX_FCMACHI                0x08
+#define VSC73XX_FCMACLO                0x0c
+#define VSC73XX_MAXLEN         0x10
+#define VSC73XX_ADVPORTM       0x19
+#define VSC73XX_TXUPDCFG       0x24
+#define VSC73XX_TXQ_SELECT_CFG 0x28
+#define VSC73XX_RXOCT          0x50
+#define VSC73XX_TXOCT          0x51
+#define VSC73XX_C_RX0          0x52
+#define VSC73XX_C_RX1          0x53
+#define VSC73XX_C_RX2          0x54
+#define VSC73XX_C_TX0          0x55
+#define VSC73XX_C_TX1          0x56
+#define VSC73XX_C_TX2          0x57
+#define VSC73XX_C_CFG          0x58
+#define VSC73XX_CAT_DROP       0x6e
+#define VSC73XX_CAT_PR_MISC_L2 0x6f
+#define VSC73XX_CAT_PR_USR_PRIO        0x75
+#define VSC73XX_Q_MISC_CONF    0xdf
+
+/* MAC_CFG register bits */
+#define VSC73XX_MAC_CFG_WEXC_DIS       BIT(31)
+#define VSC73XX_MAC_CFG_PORT_RST       BIT(29)
+#define VSC73XX_MAC_CFG_TX_EN          BIT(28)
+#define VSC73XX_MAC_CFG_SEED_LOAD      BIT(27)
+#define VSC73XX_MAC_CFG_SEED_MASK      GENMASK(26, 19)
+#define VSC73XX_MAC_CFG_SEED_OFFSET    19
+#define VSC73XX_MAC_CFG_FDX            BIT(18)
+#define VSC73XX_MAC_CFG_GIGA_MODE      BIT(17)
+#define VSC73XX_MAC_CFG_RX_EN          BIT(16)
+#define VSC73XX_MAC_CFG_VLAN_DBLAWR    BIT(15)
+#define VSC73XX_MAC_CFG_VLAN_AWR       BIT(14)
+#define VSC73XX_MAC_CFG_100_BASE_T     BIT(13) /* Not in manual */
+#define VSC73XX_MAC_CFG_TX_IPG_MASK    GENMASK(10, 6)
+#define VSC73XX_MAC_CFG_TX_IPG_OFFSET  6
+#define VSC73XX_MAC_CFG_TX_IPG_1000M   (6 << VSC73XX_MAC_CFG_TX_IPG_OFFSET)
+#define VSC73XX_MAC_CFG_TX_IPG_100_10M (17 << VSC73XX_MAC_CFG_TX_IPG_OFFSET)
+#define VSC73XX_MAC_CFG_MAC_RX_RST     BIT(5)
+#define VSC73XX_MAC_CFG_MAC_TX_RST     BIT(4)
+#define VSC73XX_MAC_CFG_CLK_SEL_MASK   GENMASK(2, 0)
+#define VSC73XX_MAC_CFG_CLK_SEL_OFFSET 0
+#define VSC73XX_MAC_CFG_CLK_SEL_1000M  1
+#define VSC73XX_MAC_CFG_CLK_SEL_100M   2
+#define VSC73XX_MAC_CFG_CLK_SEL_10M    3
+#define VSC73XX_MAC_CFG_CLK_SEL_EXT    4
+
+#define VSC73XX_MAC_CFG_1000M_F_PHY    (VSC73XX_MAC_CFG_FDX | \
+                                        VSC73XX_MAC_CFG_GIGA_MODE | \
+                                        VSC73XX_MAC_CFG_TX_IPG_1000M | \
+                                        VSC73XX_MAC_CFG_CLK_SEL_EXT)
+#define VSC73XX_MAC_CFG_100_10M_F_PHY  (VSC73XX_MAC_CFG_FDX | \
+                                        VSC73XX_MAC_CFG_TX_IPG_100_10M | \
+                                        VSC73XX_MAC_CFG_CLK_SEL_EXT)
+#define VSC73XX_MAC_CFG_100_10M_H_PHY  (VSC73XX_MAC_CFG_TX_IPG_100_10M | \
+                                        VSC73XX_MAC_CFG_CLK_SEL_EXT)
+#define VSC73XX_MAC_CFG_1000M_F_RGMII  (VSC73XX_MAC_CFG_FDX | \
+                                        VSC73XX_MAC_CFG_GIGA_MODE | \
+                                        VSC73XX_MAC_CFG_TX_IPG_1000M | \
+                                        VSC73XX_MAC_CFG_CLK_SEL_1000M)
+#define VSC73XX_MAC_CFG_RESET          (VSC73XX_MAC_CFG_PORT_RST | \
+                                        VSC73XX_MAC_CFG_MAC_RX_RST | \
+                                        VSC73XX_MAC_CFG_MAC_TX_RST)
+
+/* Flow control register bits */
+#define VSC73XX_FCCONF_ZERO_PAUSE_EN   BIT(17)
+#define VSC73XX_FCCONF_FLOW_CTRL_OBEY  BIT(16)
+#define VSC73XX_FCCONF_PAUSE_VAL_MASK  GENMASK(15, 0)
+
+/* ADVPORTM advanced port setup register bits */
+#define VSC73XX_ADVPORTM_IFG_PPM       BIT(7)
+#define VSC73XX_ADVPORTM_EXC_COL_CONT  BIT(6)
+#define VSC73XX_ADVPORTM_EXT_PORT      BIT(5)
+#define VSC73XX_ADVPORTM_INV_GTX       BIT(4)
+#define VSC73XX_ADVPORTM_ENA_GTX       BIT(3)
+#define VSC73XX_ADVPORTM_DDR_MODE      BIT(2)
+#define VSC73XX_ADVPORTM_IO_LOOPBACK   BIT(1)
+#define VSC73XX_ADVPORTM_HOST_LOOPBACK BIT(0)
+
+/* CAT_DROP categorizer frame dropping register bits */
+#define VSC73XX_CAT_DROP_DROP_MC_SMAC_ENA      BIT(6)
+#define VSC73XX_CAT_DROP_FWD_CTRL_ENA          BIT(4)
+#define VSC73XX_CAT_DROP_FWD_PAUSE_ENA         BIT(3)
+#define VSC73XX_CAT_DROP_UNTAGGED_ENA          BIT(2)
+#define VSC73XX_CAT_DROP_TAGGED_ENA            BIT(1)
+#define VSC73XX_CAT_DROP_NULL_MAC_ENA          BIT(0)
+
+#define VSC73XX_Q_MISC_CONF_EXTENT_MEM         BIT(31)
+#define VSC73XX_Q_MISC_CONF_EARLY_TX_MASK      GENMASK(4, 1)
+#define VSC73XX_Q_MISC_CONF_EARLY_TX_512       (1 << 1)
+#define VSC73XX_Q_MISC_CONF_MAC_PAUSE_MODE     BIT(0)
+
+/* Frame analyzer block 2 registers */
+#define VSC73XX_STORMLIMIT     0x02
+#define VSC73XX_ADVLEARN       0x03
+#define VSC73XX_IFLODMSK       0x04
+#define VSC73XX_VLANMASK       0x05
+#define VSC73XX_MACHDATA       0x06
+#define VSC73XX_MACLDATA       0x07
+#define VSC73XX_ANMOVED                0x08
+#define VSC73XX_ANAGEFIL       0x09
+#define VSC73XX_ANEVENTS       0x0a
+#define VSC73XX_ANCNTMASK      0x0b
+#define VSC73XX_ANCNTVAL       0x0c
+#define VSC73XX_LEARNMASK      0x0d
+#define VSC73XX_UFLODMASK      0x0e
+#define VSC73XX_MFLODMASK      0x0f
+#define VSC73XX_RECVMASK       0x10
+#define VSC73XX_AGGRCTRL       0x20
+#define VSC73XX_AGGRMSKS       0x30 /* Until 0x3f */
+#define VSC73XX_DSTMASKS       0x40 /* Until 0x7f */
+#define VSC73XX_SRCMASKS       0x80 /* Until 0x87 */
+#define VSC73XX_CAPENAB                0xa0
+#define VSC73XX_MACACCESS      0xb0
+#define VSC73XX_IPMCACCESS     0xb1
+#define VSC73XX_MACTINDX       0xc0
+#define VSC73XX_VLANACCESS     0xd0
+#define VSC73XX_VLANTIDX       0xe0
+#define VSC73XX_AGENCTRL       0xf0
+#define VSC73XX_CAPRST         0xff
+
+#define VSC73XX_MACACCESS_CPU_COPY             BIT(14)
+#define VSC73XX_MACACCESS_FWD_KILL             BIT(13)
+#define VSC73XX_MACACCESS_IGNORE_VLAN          BIT(12)
+#define VSC73XX_MACACCESS_AGED_FLAG            BIT(11)
+#define VSC73XX_MACACCESS_VALID                        BIT(10)
+#define VSC73XX_MACACCESS_LOCKED               BIT(9)
+#define VSC73XX_MACACCESS_DEST_IDX_MASK                GENMASK(8, 3)
+#define VSC73XX_MACACCESS_CMD_MASK             GENMASK(2, 0)
+#define VSC73XX_MACACCESS_CMD_IDLE             0
+#define VSC73XX_MACACCESS_CMD_LEARN            1
+#define VSC73XX_MACACCESS_CMD_FORGET           2
+#define VSC73XX_MACACCESS_CMD_AGE_TABLE                3
+#define VSC73XX_MACACCESS_CMD_FLUSH_TABLE      4
+#define VSC73XX_MACACCESS_CMD_CLEAR_TABLE      5
+#define VSC73XX_MACACCESS_CMD_READ_ENTRY       6
+#define VSC73XX_MACACCESS_CMD_WRITE_ENTRY      7
+
+#define VSC73XX_VLANACCESS_LEARN_DISABLED      BIT(30)
+#define VSC73XX_VLANACCESS_VLAN_MIRROR         BIT(29)
+#define VSC73XX_VLANACCESS_VLAN_SRC_CHECK      BIT(28)
+#define VSC73XX_VLANACCESS_VLAN_PORT_MASK      GENMASK(9, 2)
+#define VSC73XX_VLANACCESS_VLAN_TBL_CMD_MASK   GENMASK(2, 0)
+#define VSC73XX_VLANACCESS_VLAN_TBL_CMD_IDLE   0
+#define VSC73XX_VLANACCESS_VLAN_TBL_CMD_READ_ENTRY     1
+#define VSC73XX_VLANACCESS_VLAN_TBL_CMD_WRITE_ENTRY    2
+#define VSC73XX_VLANACCESS_VLAN_TBL_CMD_CLEAR_TABLE    3
+
+/* MII block 3 registers */
+#define VSC73XX_MII_STAT       0x0
+#define VSC73XX_MII_CMD                0x1
+#define VSC73XX_MII_DATA       0x2
+
+/* Arbiter block 5 registers */
+#define VSC73XX_ARBEMPTY               0x0c
+#define VSC73XX_ARBDISC                        0x0e
+#define VSC73XX_SBACKWDROP             0x12
+#define VSC73XX_DBACKWDROP             0x13
+#define VSC73XX_ARBBURSTPROB           0x15
+
+/* System block 7 registers */
+#define VSC73XX_ICPU_SIPAD             0x01
+#define VSC73XX_GMIIDELAY              0x05
+#define VSC73XX_ICPU_CTRL              0x10
+#define VSC73XX_ICPU_ADDR              0x11
+#define VSC73XX_ICPU_SRAM              0x12
+#define VSC73XX_HWSEM                  0x13
+#define VSC73XX_GLORESET               0x14
+#define VSC73XX_ICPU_MBOX_VAL          0x15
+#define VSC73XX_ICPU_MBOX_SET          0x16
+#define VSC73XX_ICPU_MBOX_CLR          0x17
+#define VSC73XX_CHIPID                 0x18
+#define VSC73XX_GPIO                   0x34
+
+#define VSC73XX_GMIIDELAY_GMII0_GTXDELAY_NONE  0
+#define VSC73XX_GMIIDELAY_GMII0_GTXDELAY_1_4_NS        1
+#define VSC73XX_GMIIDELAY_GMII0_GTXDELAY_1_7_NS        2
+#define VSC73XX_GMIIDELAY_GMII0_GTXDELAY_2_0_NS        3
+
+#define VSC73XX_GMIIDELAY_GMII0_RXDELAY_NONE   (0 << 4)
+#define VSC73XX_GMIIDELAY_GMII0_RXDELAY_1_4_NS (1 << 4)
+#define VSC73XX_GMIIDELAY_GMII0_RXDELAY_1_7_NS (2 << 4)
+#define VSC73XX_GMIIDELAY_GMII0_RXDELAY_2_0_NS (3 << 4)
+
+#define VSC73XX_ICPU_CTRL_WATCHDOG_RST BIT(31)
+#define VSC73XX_ICPU_CTRL_CLK_DIV_MASK GENMASK(12, 8)
+#define VSC73XX_ICPU_CTRL_SRST_HOLD    BIT(7)
+#define VSC73XX_ICPU_CTRL_ICPU_PI_EN   BIT(6)
+#define VSC73XX_ICPU_CTRL_BOOT_EN      BIT(3)
+#define VSC73XX_ICPU_CTRL_EXT_ACC_EN   BIT(2)
+#define VSC73XX_ICPU_CTRL_CLK_EN       BIT(1)
+#define VSC73XX_ICPU_CTRL_SRST         BIT(0)
+
+#define VSC73XX_CHIPID_ID_SHIFT                12
+#define VSC73XX_CHIPID_ID_MASK         0xffff
+#define VSC73XX_CHIPID_REV_SHIFT       28
+#define VSC73XX_CHIPID_REV_MASK                0xf
+#define VSC73XX_CHIPID_ID_7385         0x7385
+#define VSC73XX_CHIPID_ID_7388         0x7388
+#define VSC73XX_CHIPID_ID_7395         0x7395
+#define VSC73XX_CHIPID_ID_7398         0x7398
+
+#define VSC73XX_GLORESET_STROBE                BIT(4)
+#define VSC73XX_GLORESET_ICPU_LOCK     BIT(3)
+#define VSC73XX_GLORESET_MEM_LOCK      BIT(2)
+#define VSC73XX_GLORESET_PHY_RESET     BIT(1)
+#define VSC73XX_GLORESET_MASTER_RESET  BIT(0)
+
+#define VSC73XX_CMD_MODE_READ          0
+#define VSC73XX_CMD_MODE_WRITE         1
+#define VSC73XX_CMD_MODE_SHIFT         4
+#define VSC73XX_CMD_BLOCK_SHIFT                5
+#define VSC73XX_CMD_BLOCK_MASK         0x7
+#define VSC73XX_CMD_SUBBLOCK_MASK      0xf
+
+#define VSC7385_CLOCK_DELAY            ((3 << 4) | 3)
+#define VSC7385_CLOCK_DELAY_MASK       ((3 << 4) | 3)
+
+#define VSC73XX_ICPU_CTRL_STOP (VSC73XX_ICPU_CTRL_SRST_HOLD | \
+                                VSC73XX_ICPU_CTRL_BOOT_EN | \
+                                VSC73XX_ICPU_CTRL_EXT_ACC_EN)
+
+#define VSC73XX_ICPU_CTRL_START        (VSC73XX_ICPU_CTRL_CLK_DIV | \
+                                VSC73XX_ICPU_CTRL_BOOT_EN | \
+                                VSC73XX_ICPU_CTRL_CLK_EN | \
+                                VSC73XX_ICPU_CTRL_SRST)
+
+/**
+ * struct vsc73xx - VSC73xx state container
+ */
+struct vsc73xx {
+       struct device           *dev;
+       struct gpio_desc        *reset;
+       struct spi_device       *spi;
+       struct dsa_switch       *ds;
+       struct gpio_chip        gc;
+       u16                     chipid;
+       u8                      addr[ETH_ALEN];
+       struct mutex            lock; /* Protects SPI traffic */
+};
+
+#define IS_7385(a) ((a)->chipid == VSC73XX_CHIPID_ID_7385)
+#define IS_7388(a) ((a)->chipid == VSC73XX_CHIPID_ID_7388)
+#define IS_7395(a) ((a)->chipid == VSC73XX_CHIPID_ID_7395)
+#define IS_7398(a) ((a)->chipid == VSC73XX_CHIPID_ID_7398)
+#define IS_739X(a) (IS_7395(a) || IS_7398(a))
+
+struct vsc73xx_counter {
+       u8 counter;
+       const char *name;
+};
+
+/* Counters are named according to the MIB standards where applicable.
+ * Some counters are custom, non-standard. The standard counters are
+ * named in accordance with RFC2819, RFC2021 and IEEE Std 802.3-2002 Annex
+ * 30A Counters.
+ */
+static const struct vsc73xx_counter vsc73xx_rx_counters[] = {
+       { 0, "RxEtherStatsPkts" },
+       { 1, "RxBroadcast+MulticastPkts" }, /* non-standard counter */
+       { 2, "RxTotalErrorPackets" }, /* non-standard counter */
+       { 3, "RxEtherStatsBroadcastPkts" },
+       { 4, "RxEtherStatsMulticastPkts" },
+       { 5, "RxEtherStatsPkts64Octets" },
+       { 6, "RxEtherStatsPkts65to127Octets" },
+       { 7, "RxEtherStatsPkts128to255Octets" },
+       { 8, "RxEtherStatsPkts256to511Octets" },
+       { 9, "RxEtherStatsPkts512to1023Octets" },
+       { 10, "RxEtherStatsPkts1024to1518Octets" },
+       { 11, "RxJumboFrames" }, /* non-standard counter */
+       { 12, "RxaPauseMACControlFramesTransmitted" },
+       { 13, "RxFIFODrops" }, /* non-standard counter */
+       { 14, "RxBackwardDrops" }, /* non-standard counter */
+       { 15, "RxClassifierDrops" }, /* non-standard counter */
+       { 16, "RxEtherStatsCRCAlignErrors" },
+       { 17, "RxEtherStatsUndersizePkts" },
+       { 18, "RxEtherStatsOversizePkts" },
+       { 19, "RxEtherStatsFragments" },
+       { 20, "RxEtherStatsJabbers" },
+       { 21, "RxaMACControlFramesReceived" },
+       /* 22-24 are undefined */
+       { 25, "RxaFramesReceivedOK" },
+       { 26, "RxQoSClass0" }, /* non-standard counter */
+       { 27, "RxQoSClass1" }, /* non-standard counter */
+       { 28, "RxQoSClass2" }, /* non-standard counter */
+       { 29, "RxQoSClass3" }, /* non-standard counter */
+};
+
+static const struct vsc73xx_counter vsc73xx_tx_counters[] = {
+       { 0, "TxEtherStatsPkts" },
+       { 1, "TxBroadcast+MulticastPkts" }, /* non-standard counter */
+       { 2, "TxTotalErrorPackets" }, /* non-standard counter */
+       { 3, "TxEtherStatsBroadcastPkts" },
+       { 4, "TxEtherStatsMulticastPkts" },
+       { 5, "TxEtherStatsPkts64Octets" },
+       { 6, "TxEtherStatsPkts65to127Octets" },
+       { 7, "TxEtherStatsPkts128to255Octets" },
+       { 8, "TxEtherStatsPkts256to511Octets" },
+       { 9, "TxEtherStatsPkts512to1023Octets" },
+       { 10, "TxEtherStatsPkts1024to1518Octets" },
+       { 11, "TxJumboFrames" }, /* non-standard counter */
+       { 12, "TxaPauseMACControlFramesTransmitted" },
+       { 13, "TxFIFODrops" }, /* non-standard counter */
+       { 14, "TxDrops" }, /* non-standard counter */
+       { 15, "TxEtherStatsCollisions" },
+       { 16, "TxEtherStatsCRCAlignErrors" },
+       { 17, "TxEtherStatsUndersizePkts" },
+       { 18, "TxEtherStatsOversizePkts" },
+       { 19, "TxEtherStatsFragments" },
+       { 20, "TxEtherStatsJabbers" },
+       /* 21-24 are undefined */
+       { 25, "TxaFramesReceivedOK" },
+       { 26, "TxQoSClass0" }, /* non-standard counter */
+       { 27, "TxQoSClass1" }, /* non-standard counter */
+       { 28, "TxQoSClass2" }, /* non-standard counter */
+       { 29, "TxQoSClass3" }, /* non-standard counter */
+};
+
+static int vsc73xx_is_addr_valid(u8 block, u8 subblock)
+{
+       switch (block) {
+       case VSC73XX_BLOCK_MAC:
+               switch (subblock) {
+               case 0 ... 4:
+               case 6:
+                       return 1;
+               }
+               break;
+
+       case VSC73XX_BLOCK_ANALYZER:
+       case VSC73XX_BLOCK_SYSTEM:
+               switch (subblock) {
+               case 0:
+                       return 1;
+               }
+               break;
+
+       case VSC73XX_BLOCK_MII:
+       case VSC73XX_BLOCK_CAPTURE:
+       case VSC73XX_BLOCK_ARBITER:
+               switch (subblock) {
+               case 0 ... 1:
+                       return 1;
+               }
+               break;
+       }
+
+       return 0;
+}
+
+static u8 vsc73xx_make_addr(u8 mode, u8 block, u8 subblock)
+{
+       u8 ret;
+
+       ret = (block & VSC73XX_CMD_BLOCK_MASK) << VSC73XX_CMD_BLOCK_SHIFT;
+       ret |= (mode & 1) << VSC73XX_CMD_MODE_SHIFT;
+       ret |= subblock & VSC73XX_CMD_SUBBLOCK_MASK;
+
+       return ret;
+}
+
+static int vsc73xx_read(struct vsc73xx *vsc, u8 block, u8 subblock, u8 reg,
+                       u32 *val)
+{
+       struct spi_transfer t[2];
+       struct spi_message m;
+       u8 cmd[4];
+       u8 buf[4];
+       int ret;
+
+       if (!vsc73xx_is_addr_valid(block, subblock))
+               return -EINVAL;
+
+       spi_message_init(&m);
+
+       memset(&t, 0, sizeof(t));
+
+       t[0].tx_buf = cmd;
+       t[0].len = sizeof(cmd);
+       spi_message_add_tail(&t[0], &m);
+
+       t[1].rx_buf = buf;
+       t[1].len = sizeof(buf);
+       spi_message_add_tail(&t[1], &m);
+
+       cmd[0] = vsc73xx_make_addr(VSC73XX_CMD_MODE_READ, block, subblock);
+       cmd[1] = reg;
+       cmd[2] = 0;
+       cmd[3] = 0;
+
+       mutex_lock(&vsc->lock);
+       ret = spi_sync(vsc->spi, &m);
+       mutex_unlock(&vsc->lock);
+
+       if (ret)
+               return ret;
+
+       *val = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+
+       return 0;
+}
+
+static int vsc73xx_write(struct vsc73xx *vsc, u8 block, u8 subblock, u8 reg,
+                        u32 val)
+{
+       struct spi_transfer t[2];
+       struct spi_message m;
+       u8 cmd[2];
+       u8 buf[4];
+       int ret;
+
+       if (!vsc73xx_is_addr_valid(block, subblock))
+               return -EINVAL;
+
+       spi_message_init(&m);
+
+       memset(&t, 0, sizeof(t));
+
+       t[0].tx_buf = cmd;
+       t[0].len = sizeof(cmd);
+       spi_message_add_tail(&t[0], &m);
+
+       t[1].tx_buf = buf;
+       t[1].len = sizeof(buf);
+       spi_message_add_tail(&t[1], &m);
+
+       cmd[0] = vsc73xx_make_addr(VSC73XX_CMD_MODE_WRITE, block, subblock);
+       cmd[1] = reg;
+
+       buf[0] = (val >> 24) & 0xff;
+       buf[1] = (val >> 16) & 0xff;
+       buf[2] = (val >> 8) & 0xff;
+       buf[3] = val & 0xff;
+
+       mutex_lock(&vsc->lock);
+       ret = spi_sync(vsc->spi, &m);
+       mutex_unlock(&vsc->lock);
+
+       return ret;
+}
+
+static int vsc73xx_update_bits(struct vsc73xx *vsc, u8 block, u8 subblock,
+                              u8 reg, u32 mask, u32 val)
+{
+       u32 tmp, orig;
+       int ret;
+
+       /* Same read-modify-write algorithm as e.g. regmap */
+       ret = vsc73xx_read(vsc, block, subblock, reg, &orig);
+       if (ret)
+               return ret;
+       tmp = orig & ~mask;
+       tmp |= val & mask;
+       return vsc73xx_write(vsc, block, subblock, reg, tmp);
+}
+
+static int vsc73xx_detect(struct vsc73xx *vsc)
+{
+       bool icpu_si_boot_en;
+       bool icpu_pi_en;
+       u32 val;
+       u32 rev;
+       int ret;
+       u32 id;
+
+       ret = vsc73xx_read(vsc, VSC73XX_BLOCK_SYSTEM, 0,
+                          VSC73XX_ICPU_MBOX_VAL, &val);
+       if (ret) {
+               dev_err(vsc->dev, "unable to read mailbox (%d)\n", ret);
+               return ret;
+       }
+
+       if (val == 0xffffffff) {
+               dev_info(vsc->dev, "chip seems dead, assert reset\n");
+               gpiod_set_value_cansleep(vsc->reset, 1);
+               /* Reset pulse should be 20ns minimum, according to datasheet
+                * table 245, so 10us should be fine
+                */
+               usleep_range(10, 100);
+               gpiod_set_value_cansleep(vsc->reset, 0);
+               /* Wait 20ms according to datasheet table 245 */
+               msleep(20);
+
+               ret = vsc73xx_read(vsc, VSC73XX_BLOCK_SYSTEM, 0,
+                                  VSC73XX_ICPU_MBOX_VAL, &val);
+               if (val == 0xffffffff) {
+                       dev_err(vsc->dev, "seems not to help, giving up\n");
+                       return -ENODEV;
+               }
+       }
+
+       ret = vsc73xx_read(vsc, VSC73XX_BLOCK_SYSTEM, 0,
+                          VSC73XX_CHIPID, &val);
+       if (ret) {
+               dev_err(vsc->dev, "unable to read chip id (%d)\n", ret);
+               return ret;
+       }
+
+       id = (val >> VSC73XX_CHIPID_ID_SHIFT) &
+               VSC73XX_CHIPID_ID_MASK;
+       switch (id) {
+       case VSC73XX_CHIPID_ID_7385:
+       case VSC73XX_CHIPID_ID_7388:
+       case VSC73XX_CHIPID_ID_7395:
+       case VSC73XX_CHIPID_ID_7398:
+               break;
+       default:
+               dev_err(vsc->dev, "unsupported chip, id=%04x\n", id);
+               return -ENODEV;
+       }
+
+       vsc->chipid = id;
+       rev = (val >> VSC73XX_CHIPID_REV_SHIFT) &
+               VSC73XX_CHIPID_REV_MASK;
+       dev_info(vsc->dev, "VSC%04X (rev: %d) switch found\n", id, rev);
+
+       ret = vsc73xx_read(vsc, VSC73XX_BLOCK_SYSTEM, 0,
+                          VSC73XX_ICPU_CTRL, &val);
+       if (ret) {
+               dev_err(vsc->dev, "unable to read iCPU control\n");
+               return ret;
+       }
+
+       /* The iCPU can always be used but can boot in different ways.
+        * If it is initially disabled and has no external memory,
+        * we are in control and can do whatever we like, else we
+        * are probably in trouble (we need some way to communicate
+        * with the running firmware) so we bail out for now.
+        */
+       icpu_pi_en = !!(val & VSC73XX_ICPU_CTRL_ICPU_PI_EN);
+       icpu_si_boot_en = !!(val & VSC73XX_ICPU_CTRL_BOOT_EN);
+       if (icpu_si_boot_en && icpu_pi_en) {
+               dev_err(vsc->dev,
+                       "iCPU enabled boots from SI, has external memory\n");
+               dev_err(vsc->dev, "no idea how to deal with this\n");
+               return -ENODEV;
+       }
+       if (icpu_si_boot_en && !icpu_pi_en) {
+               dev_err(vsc->dev,
+                       "iCPU enabled boots from SI, no external memory\n");
+               dev_err(vsc->dev, "no idea how to deal with this\n");
+               return -ENODEV;
+       }
+       if (!icpu_si_boot_en && icpu_pi_en) {
+               dev_err(vsc->dev,
+                       "iCPU enabled, boots from PI external memory\n");
+               dev_err(vsc->dev, "no idea how to deal with this\n");
+               return -ENODEV;
+       }
+       /* !icpu_si_boot_en && !cpu_pi_en */
+       dev_info(vsc->dev, "iCPU disabled, no external memory\n");
+
+       return 0;
+}
+
+static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum)
+{
+       struct vsc73xx *vsc = ds->priv;
+       u32 cmd;
+       u32 val;
+       int ret;
+
+       /* Setting bit 26 means "read" */
+       cmd = BIT(26) | (phy << 21) | (regnum << 16);
+       ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd);
+       if (ret)
+               return ret;
+       msleep(2);
+       ret = vsc73xx_read(vsc, VSC73XX_BLOCK_MII, 0, 2, &val);
+       if (ret)
+               return ret;
+       if (val & BIT(16)) {
+               dev_err(vsc->dev, "reading reg %02x from phy%d failed\n",
+                       regnum, phy);
+               return -EIO;
+       }
+       val &= 0xFFFFU;
+
+       dev_dbg(vsc->dev, "read reg %02x from phy%d = %04x\n",
+               regnum, phy, val);
+
+       return val;
+}
+
+static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum,
+                            u16 val)
+{
+       struct vsc73xx *vsc = ds->priv;
+       u32 cmd;
+       int ret;
+
+       /* It was found through tedious experiments that this router
+        * chip really hates to have it's PHYs reset. They
+        * never recover if that happens: autonegotiation stops
+        * working after a reset. Just filter out this command.
+        * (Resetting the whole chip is OK.)
+        */
+       if (regnum == 0 && (val & BIT(15))) {
+               dev_info(vsc->dev, "reset PHY - disallowed\n");
+               return 0;
+       }
+
+       cmd = (phy << 21) | (regnum << 16);
+       ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd);
+       if (ret)
+               return ret;
+
+       dev_dbg(vsc->dev, "write %04x to reg %02x in phy%d\n",
+               val, regnum, phy);
+       return 0;
+}
+
+static enum dsa_tag_protocol vsc73xx_get_tag_protocol(struct dsa_switch *ds,
+                                                     int port)
+{
+       /* The switch internally uses a 8 byte header with length,
+        * source port, tag, LPA and priority. This is supposedly
+        * only accessible when operating the switch using the internal
+        * CPU or with an external CPU mapping the device in, but not
+        * when operating the switch over SPI and putting frames in/out
+        * on port 6 (the CPU port). So far we must assume that we
+        * cannot access the tag. (See "Internal frame header" section
+        * 3.9.1 in the manual.)
+        */
+       return DSA_TAG_PROTO_NONE;
+}
+
+static int vsc73xx_setup(struct dsa_switch *ds)
+{
+       struct vsc73xx *vsc = ds->priv;
+       int i;
+
+       dev_info(vsc->dev, "set up the switch\n");
+
+       /* Issue RESET */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_SYSTEM, 0, VSC73XX_GLORESET,
+                     VSC73XX_GLORESET_MASTER_RESET);
+       usleep_range(125, 200);
+
+       /* Initialize memory, initialize RAM bank 0..15 except 6 and 7
+        * This sequence appears in the
+        * VSC7385 SparX-G5 datasheet section 6.6.1
+        * VSC7395 SparX-G5e datasheet section 6.6.1
+        * "initialization sequence".
+        * No explanation is given to the 0x1010400 magic number.
+        */
+       for (i = 0; i <= 15; i++) {
+               if (i != 6 && i != 7) {
+                       vsc73xx_write(vsc, VSC73XX_BLOCK_MEMINIT,
+                                     2,
+                                     0, 0x1010400 + i);
+                       mdelay(1);
+               }
+       }
+       mdelay(30);
+
+       /* Clear MAC table */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_ANALYZER, 0,
+                     VSC73XX_MACACCESS,
+                     VSC73XX_MACACCESS_CMD_CLEAR_TABLE);
+
+       /* Clear VLAN table */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_ANALYZER, 0,
+                     VSC73XX_VLANACCESS,
+                     VSC73XX_VLANACCESS_VLAN_TBL_CMD_CLEAR_TABLE);
+
+       msleep(40);
+
+       /* Use 20KiB buffers on all ports on VSC7395
+        * The VSC7385 has 16KiB buffers and that is the
+        * default if we don't set this up explicitly.
+        * Port "31" is "all ports".
+        */
+       if (IS_739X(vsc))
+               vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, 0x1f,
+                             VSC73XX_Q_MISC_CONF,
+                             VSC73XX_Q_MISC_CONF_EXTENT_MEM);
+
+       /* Put all ports into reset until enabled */
+       for (i = 0; i < 7; i++) {
+               if (i == 5)
+                       continue;
+               vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, 4,
+                             VSC73XX_MAC_CFG, VSC73XX_MAC_CFG_RESET);
+       }
+
+       /* MII delay, set both GTX and RX delay to 2 ns */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_SYSTEM, 0, VSC73XX_GMIIDELAY,
+                     VSC73XX_GMIIDELAY_GMII0_GTXDELAY_2_0_NS |
+                     VSC73XX_GMIIDELAY_GMII0_RXDELAY_2_0_NS);
+       /* Enable reception of frames on all ports */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_ANALYZER, 0, VSC73XX_RECVMASK,
+                     0x5f);
+       /* IP multicast flood mask (table 144) */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_ANALYZER, 0, VSC73XX_IFLODMSK,
+                     0xff);
+
+       mdelay(50);
+
+       /* Release reset from the internal PHYs */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_SYSTEM, 0, VSC73XX_GLORESET,
+                     VSC73XX_GLORESET_PHY_RESET);
+
+       udelay(4);
+
+       return 0;
+}
+
+static void vsc73xx_init_port(struct vsc73xx *vsc, int port)
+{
+       u32 val;
+
+       /* MAC configure, first reset the port and then write defaults */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
+                     port,
+                     VSC73XX_MAC_CFG,
+                     VSC73XX_MAC_CFG_RESET);
+
+       /* Take up the port in 1Gbit mode by default, this will be
+        * augmented after auto-negotiation on the PHY-facing
+        * ports.
+        */
+       if (port == CPU_PORT)
+               val = VSC73XX_MAC_CFG_1000M_F_RGMII;
+       else
+               val = VSC73XX_MAC_CFG_1000M_F_PHY;
+
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
+                     port,
+                     VSC73XX_MAC_CFG,
+                     val |
+                     VSC73XX_MAC_CFG_TX_EN |
+                     VSC73XX_MAC_CFG_RX_EN);
+
+       /* Max length, we can do up to 9.6 KiB, so allow that.
+        * According to application not "VSC7398 Jumbo Frames" setting
+        * up the MTU to 9.6 KB does not affect the performance on standard
+        * frames, so just enable it. It is clear from the application note
+        * that "9.6 kilobytes" == 9600 bytes.
+        */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
+                     port,
+                     VSC73XX_MAXLEN, 9600);
+
+       /* Flow control for the CPU port:
+        * Use a zero delay pause frame when pause condition is left
+        * Obey pause control frames
+        */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
+                     port,
+                     VSC73XX_FCCONF,
+                     VSC73XX_FCCONF_ZERO_PAUSE_EN |
+                     VSC73XX_FCCONF_FLOW_CTRL_OBEY);
+
+       /* Issue pause control frames on PHY facing ports.
+        * Allow early initiation of MAC transmission if the amount
+        * of egress data is below 512 bytes on CPU port.
+        * FIXME: enable 20KiB buffers?
+        */
+       if (port == CPU_PORT)
+               val = VSC73XX_Q_MISC_CONF_EARLY_TX_512;
+       else
+               val = VSC73XX_Q_MISC_CONF_MAC_PAUSE_MODE;
+       val |= VSC73XX_Q_MISC_CONF_EXTENT_MEM;
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
+                     port,
+                     VSC73XX_Q_MISC_CONF,
+                     val);
+
+       /* Flow control MAC: a MAC address used in flow control frames */
+       val = (vsc->addr[5] << 16) | (vsc->addr[4] << 8) | (vsc->addr[3]);
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
+                     port,
+                     VSC73XX_FCMACHI,
+                     val);
+       val = (vsc->addr[2] << 16) | (vsc->addr[1] << 8) | (vsc->addr[0]);
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
+                     port,
+                     VSC73XX_FCMACLO,
+                     val);
+
+       /* Tell the categorizer to forward pause frames, not control
+        * frame. Do not drop anything.
+        */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
+                     port,
+                     VSC73XX_CAT_DROP,
+                     VSC73XX_CAT_DROP_FWD_PAUSE_ENA);
+
+       /* Clear all counters */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
+                     port, VSC73XX_C_RX0, 0);
+}
+
+static void vsc73xx_adjust_enable_port(struct vsc73xx *vsc,
+                                      int port, struct phy_device *phydev,
+                                      u32 initval)
+{
+       u32 val = initval;
+       u8 seed;
+
+       /* Reset this port FIXME: break out subroutine */
+       val |= VSC73XX_MAC_CFG_RESET;
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port, VSC73XX_MAC_CFG, val);
+
+       /* Seed the port randomness with randomness */
+       get_random_bytes(&seed, 1);
+       val |= seed << VSC73XX_MAC_CFG_SEED_OFFSET;
+       val |= VSC73XX_MAC_CFG_SEED_LOAD;
+       val |= VSC73XX_MAC_CFG_WEXC_DIS;
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port, VSC73XX_MAC_CFG, val);
+
+       /* Flow control for the PHY facing ports:
+        * Use a zero delay pause frame when pause condition is left
+        * Obey pause control frames
+        * When generating pause frames, use 0xff as pause value
+        */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port, VSC73XX_FCCONF,
+                     VSC73XX_FCCONF_ZERO_PAUSE_EN |
+                     VSC73XX_FCCONF_FLOW_CTRL_OBEY |
+                     0xff);
+
+       /* Disallow backward dropping of frames from this port */
+       vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ARBITER, 0,
+                           VSC73XX_SBACKWDROP, BIT(port), 0);
+
+       /* Enable TX, RX, deassert reset, stop loading seed */
+       vsc73xx_update_bits(vsc, VSC73XX_BLOCK_MAC, port,
+                           VSC73XX_MAC_CFG,
+                           VSC73XX_MAC_CFG_RESET | VSC73XX_MAC_CFG_SEED_LOAD |
+                           VSC73XX_MAC_CFG_TX_EN | VSC73XX_MAC_CFG_RX_EN,
+                           VSC73XX_MAC_CFG_TX_EN | VSC73XX_MAC_CFG_RX_EN);
+}
+
+static void vsc73xx_adjust_link(struct dsa_switch *ds, int port,
+                               struct phy_device *phydev)
+{
+       struct vsc73xx *vsc = ds->priv;
+       u32 val;
+
+       /* Special handling of the CPU-facing port */
+       if (port == CPU_PORT) {
+               /* Other ports are already initialized but not this one */
+               vsc73xx_init_port(vsc, CPU_PORT);
+               /* Select the external port for this interface (EXT_PORT)
+                * Enable the GMII GTX external clock
+                * Use double data rate (DDR mode)
+                */
+               vsc73xx_write(vsc, VSC73XX_BLOCK_MAC,
+                             CPU_PORT,
+                             VSC73XX_ADVPORTM,
+                             VSC73XX_ADVPORTM_EXT_PORT |
+                             VSC73XX_ADVPORTM_ENA_GTX |
+                             VSC73XX_ADVPORTM_DDR_MODE);
+       }
+
+       /* This is the MAC confiuration that always need to happen
+        * after a PHY or the CPU port comes up or down.
+        */
+       if (!phydev->link) {
+               int maxloop = 10;
+
+               dev_dbg(vsc->dev, "port %d: went down\n",
+                       port);
+
+               /* Disable RX on this port */
+               vsc73xx_update_bits(vsc, VSC73XX_BLOCK_MAC, port,
+                                   VSC73XX_MAC_CFG,
+                                   VSC73XX_MAC_CFG_RX_EN, 0);
+
+               /* Discard packets */
+               vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ARBITER, 0,
+                                   VSC73XX_ARBDISC, BIT(port), BIT(port));
+
+               /* Wait until queue is empty */
+               vsc73xx_read(vsc, VSC73XX_BLOCK_ARBITER, 0,
+                            VSC73XX_ARBEMPTY, &val);
+               while (!(val & BIT(port))) {
+                       msleep(1);
+                       vsc73xx_read(vsc, VSC73XX_BLOCK_ARBITER, 0,
+                                    VSC73XX_ARBEMPTY, &val);
+                       if (--maxloop == 0) {
+                               dev_err(vsc->dev,
+                                       "timeout waiting for block arbiter\n");
+                               /* Continue anyway */
+                               break;
+                       }
+               }
+
+               /* Put this port into reset */
+               vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port, VSC73XX_MAC_CFG,
+                             VSC73XX_MAC_CFG_RESET);
+
+               /* Accept packets again */
+               vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ARBITER, 0,
+                                   VSC73XX_ARBDISC, BIT(port), 0);
+
+               /* Allow backward dropping of frames from this port */
+               vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ARBITER, 0,
+                                   VSC73XX_SBACKWDROP, BIT(port), BIT(port));
+
+               /* Receive mask (disable forwarding) */
+               vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ANALYZER, 0,
+                                   VSC73XX_RECVMASK, BIT(port), 0);
+
+               return;
+       }
+
+       /* Figure out what speed was negotiated */
+       if (phydev->speed == SPEED_1000) {
+               dev_dbg(vsc->dev, "port %d: 1000 Mbit mode full duplex\n",
+                       port);
+
+               /* Set up default for internal port or external RGMII */
+               if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
+                       val = VSC73XX_MAC_CFG_1000M_F_RGMII;
+               else
+                       val = VSC73XX_MAC_CFG_1000M_F_PHY;
+               vsc73xx_adjust_enable_port(vsc, port, phydev, val);
+       } else if (phydev->speed == SPEED_100) {
+               if (phydev->duplex == DUPLEX_FULL) {
+                       val = VSC73XX_MAC_CFG_100_10M_F_PHY;
+                       dev_dbg(vsc->dev,
+                               "port %d: 100 Mbit full duplex mode\n",
+                               port);
+               } else {
+                       val = VSC73XX_MAC_CFG_100_10M_H_PHY;
+                       dev_dbg(vsc->dev,
+                               "port %d: 100 Mbit half duplex mode\n",
+                               port);
+               }
+               vsc73xx_adjust_enable_port(vsc, port, phydev, val);
+       } else if (phydev->speed == SPEED_10) {
+               if (phydev->duplex == DUPLEX_FULL) {
+                       val = VSC73XX_MAC_CFG_100_10M_F_PHY;
+                       dev_dbg(vsc->dev,
+                               "port %d: 10 Mbit full duplex mode\n",
+                               port);
+               } else {
+                       val = VSC73XX_MAC_CFG_100_10M_H_PHY;
+                       dev_dbg(vsc->dev,
+                               "port %d: 10 Mbit half duplex mode\n",
+                               port);
+               }
+               vsc73xx_adjust_enable_port(vsc, port, phydev, val);
+       } else {
+               dev_err(vsc->dev,
+                       "could not adjust link: unknown speed\n");
+       }
+
+       /* Enable port (forwarding) in the receieve mask */
+       vsc73xx_update_bits(vsc, VSC73XX_BLOCK_ANALYZER, 0,
+                           VSC73XX_RECVMASK, BIT(port), BIT(port));
+}
+
+static int vsc73xx_port_enable(struct dsa_switch *ds, int port,
+                              struct phy_device *phy)
+{
+       struct vsc73xx *vsc = ds->priv;
+
+       dev_info(vsc->dev, "enable port %d\n", port);
+       vsc73xx_init_port(vsc, port);
+
+       return 0;
+}
+
+static void vsc73xx_port_disable(struct dsa_switch *ds, int port,
+                                struct phy_device *phy)
+{
+       struct vsc73xx *vsc = ds->priv;
+
+       /* Just put the port into reset */
+       vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port,
+                     VSC73XX_MAC_CFG, VSC73XX_MAC_CFG_RESET);
+}
+
+static const struct vsc73xx_counter *
+vsc73xx_find_counter(struct vsc73xx *vsc,
+                    u8 counter,
+                    bool tx)
+{
+       const struct vsc73xx_counter *cnts;
+       int num_cnts;
+       int i;
+
+       if (tx) {
+               cnts = vsc73xx_tx_counters;
+               num_cnts = ARRAY_SIZE(vsc73xx_tx_counters);
+       } else {
+               cnts = vsc73xx_rx_counters;
+               num_cnts = ARRAY_SIZE(vsc73xx_rx_counters);
+       }
+
+       for (i = 0; i < num_cnts; i++) {
+               const struct vsc73xx_counter *cnt;
+
+               cnt = &cnts[i];
+               if (cnt->counter == counter)
+                       return cnt;
+       }
+
+       return NULL;
+}
+
+static void vsc73xx_get_strings(struct dsa_switch *ds, int port, u32 stringset,
+                               uint8_t *data)
+{
+       const struct vsc73xx_counter *cnt;
+       struct vsc73xx *vsc = ds->priv;
+       u8 indices[6];
+       int i, j;
+       u32 val;
+       int ret;
+
+       if (stringset != ETH_SS_STATS)
+               return;
+
+       ret = vsc73xx_read(vsc, VSC73XX_BLOCK_MAC, port,
+                          VSC73XX_C_CFG, &val);
+       if (ret)
+               return;
+
+       indices[0] = (val & 0x1f); /* RX counter 0 */
+       indices[1] = ((val >> 5) & 0x1f); /* RX counter 1 */
+       indices[2] = ((val >> 10) & 0x1f); /* RX counter 2 */
+       indices[3] = ((val >> 16) & 0x1f); /* TX counter 0 */
+       indices[4] = ((val >> 21) & 0x1f); /* TX counter 1 */
+       indices[5] = ((val >> 26) & 0x1f); /* TX counter 2 */
+
+       /* The first counters is the RX octets */
+       j = 0;
+       strncpy(data + j * ETH_GSTRING_LEN,
+               "RxEtherStatsOctets", ETH_GSTRING_LEN);
+       j++;
+
+       /* Each port supports recording 3 RX counters and 3 TX counters,
+        * figure out what counters we use in this set-up and return the
+        * names of them. The hardware default counters will be number of
+        * packets on RX/TX, combined broadcast+multicast packets RX/TX and
+        * total error packets RX/TX.
+        */
+       for (i = 0; i < 3; i++) {
+               cnt = vsc73xx_find_counter(vsc, indices[i], false);
+               if (cnt)
+                       strncpy(data + j * ETH_GSTRING_LEN,
+                               cnt->name, ETH_GSTRING_LEN);
+               j++;
+       }
+
+       /* TX stats begins with the number of TX octets */
+       strncpy(data + j * ETH_GSTRING_LEN,
+               "TxEtherStatsOctets", ETH_GSTRING_LEN);
+       j++;
+
+       for (i = 3; i < 6; i++) {
+               cnt = vsc73xx_find_counter(vsc, indices[i], true);
+               if (cnt)
+                       strncpy(data + j * ETH_GSTRING_LEN,
+                               cnt->name, ETH_GSTRING_LEN);
+               j++;
+       }
+}
+
+static int vsc73xx_get_sset_count(struct dsa_switch *ds, int port, int sset)
+{
+       /* We only support SS_STATS */
+       if (sset != ETH_SS_STATS)
+               return 0;
+       /* RX and TX packets, then 3 RX counters, 3 TX counters */
+       return 8;
+}
+
+static void vsc73xx_get_ethtool_stats(struct dsa_switch *ds, int port,
+                                     uint64_t *data)
+{
+       struct vsc73xx *vsc = ds->priv;
+       u8 regs[] = {
+               VSC73XX_RXOCT,
+               VSC73XX_C_RX0,
+               VSC73XX_C_RX1,
+               VSC73XX_C_RX2,
+               VSC73XX_TXOCT,
+               VSC73XX_C_TX0,
+               VSC73XX_C_TX1,
+               VSC73XX_C_TX2,
+       };
+       u32 val;
+       int ret;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(regs); i++) {
+               ret = vsc73xx_read(vsc, VSC73XX_BLOCK_MAC, port,
+                                  regs[i], &val);
+               if (ret) {
+                       dev_err(vsc->dev, "error reading counter %d\n", i);
+                       return;
+               }
+               data[i] = val;
+       }
+}
+
+static const struct dsa_switch_ops vsc73xx_ds_ops = {
+       .get_tag_protocol = vsc73xx_get_tag_protocol,
+       .setup = vsc73xx_setup,
+       .phy_read = vsc73xx_phy_read,
+       .phy_write = vsc73xx_phy_write,
+       .adjust_link = vsc73xx_adjust_link,
+       .get_strings = vsc73xx_get_strings,
+       .get_ethtool_stats = vsc73xx_get_ethtool_stats,
+       .get_sset_count = vsc73xx_get_sset_count,
+       .port_enable = vsc73xx_port_enable,
+       .port_disable = vsc73xx_port_disable,
+};
+
+static int vsc73xx_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+       struct vsc73xx *vsc = gpiochip_get_data(chip);
+       u32 val;
+       int ret;
+
+       ret = vsc73xx_read(vsc, VSC73XX_BLOCK_SYSTEM, 0,
+                          VSC73XX_GPIO, &val);
+       if (ret)
+               return ret;
+
+       return !!(val & BIT(offset));
+}
+
+static void vsc73xx_gpio_set(struct gpio_chip *chip, unsigned int offset,
+                            int val)
+{
+       struct vsc73xx *vsc = gpiochip_get_data(chip);
+       u32 tmp = val ? BIT(offset) : 0;
+
+       vsc73xx_update_bits(vsc, VSC73XX_BLOCK_SYSTEM, 0,
+                           VSC73XX_GPIO, BIT(offset), tmp);
+}
+
+static int vsc73xx_gpio_direction_output(struct gpio_chip *chip,
+                                        unsigned int offset, int val)
+{
+       struct vsc73xx *vsc = gpiochip_get_data(chip);
+       u32 tmp = val ? BIT(offset) : 0;
+
+       return vsc73xx_update_bits(vsc, VSC73XX_BLOCK_SYSTEM, 0,
+                                  VSC73XX_GPIO, BIT(offset + 4) | BIT(offset),
+                                  BIT(offset + 4) | tmp);
+}
+
+static int vsc73xx_gpio_direction_input(struct gpio_chip *chip,
+                                       unsigned int offset)
+{
+       struct vsc73xx *vsc = gpiochip_get_data(chip);
+
+       return  vsc73xx_update_bits(vsc, VSC73XX_BLOCK_SYSTEM, 0,
+                                   VSC73XX_GPIO, BIT(offset + 4),
+                                   0);
+}
+
+static int vsc73xx_gpio_get_direction(struct gpio_chip *chip,
+                                     unsigned int offset)
+{
+       struct vsc73xx *vsc = gpiochip_get_data(chip);
+       u32 val;
+       int ret;
+
+       ret = vsc73xx_read(vsc, VSC73XX_BLOCK_SYSTEM, 0,
+                          VSC73XX_GPIO, &val);
+       if (ret)
+               return ret;
+
+       return !(val & BIT(offset + 4));
+}
+
+static int vsc73xx_gpio_probe(struct vsc73xx *vsc)
+{
+       int ret;
+
+       vsc->gc.label = devm_kasprintf(vsc->dev, GFP_KERNEL, "VSC%04x",
+                                      vsc->chipid);
+       vsc->gc.ngpio = 4;
+       vsc->gc.owner = THIS_MODULE;
+       vsc->gc.parent = vsc->dev;
+       vsc->gc.of_node = vsc->dev->of_node;
+       vsc->gc.base = -1;
+       vsc->gc.get = vsc73xx_gpio_get;
+       vsc->gc.set = vsc73xx_gpio_set;
+       vsc->gc.direction_input = vsc73xx_gpio_direction_input;
+       vsc->gc.direction_output = vsc73xx_gpio_direction_output;
+       vsc->gc.get_direction = vsc73xx_gpio_get_direction;
+       vsc->gc.can_sleep = true;
+       ret = devm_gpiochip_add_data(vsc->dev, &vsc->gc, vsc);
+       if (ret) {
+               dev_err(vsc->dev, "unable to register GPIO chip\n");
+               return ret;
+       }
+       return 0;
+}
+
+static int vsc73xx_probe(struct spi_device *spi)
+{
+       struct device *dev = &spi->dev;
+       struct vsc73xx *vsc;
+       int ret;
+
+       vsc = devm_kzalloc(dev, sizeof(*vsc), GFP_KERNEL);
+       if (!vsc)
+               return -ENOMEM;
+
+       spi_set_drvdata(spi, vsc);
+       vsc->spi = spi_dev_get(spi);
+       vsc->dev = dev;
+       mutex_init(&vsc->lock);
+
+       /* Release reset, if any */
+       vsc->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+       if (IS_ERR(vsc->reset)) {
+               dev_err(dev, "failed to get RESET GPIO\n");
+               return PTR_ERR(vsc->reset);
+       }
+       if (vsc->reset)
+               /* Wait 20ms according to datasheet table 245 */
+               msleep(20);
+
+       spi->mode = SPI_MODE_0;
+       spi->bits_per_word = 8;
+       ret = spi_setup(spi);
+       if (ret < 0) {
+               dev_err(dev, "spi setup failed.\n");
+               return ret;
+       }
+
+       ret = vsc73xx_detect(vsc);
+       if (ret) {
+               dev_err(dev, "no chip found (%d)\n", ret);
+               return -ENODEV;
+       }
+
+       eth_random_addr(vsc->addr);
+       dev_info(vsc->dev,
+                "MAC for control frames: %02X:%02X:%02X:%02X:%02X:%02X\n",
+                vsc->addr[0], vsc->addr[1], vsc->addr[2],
+                vsc->addr[3], vsc->addr[4], vsc->addr[5]);
+
+       /* The VSC7395 switch chips have 5+1 ports which means 5
+        * ordinary ports and a sixth CPU port facing the processor
+        * with an RGMII interface. These ports are numbered 0..4
+        * and 6, so they leave a "hole" in the port map for port 5,
+        * which is invalid.
+        *
+        * The VSC7398 has 8 ports, port 7 is again the CPU port.
+        *
+        * We allocate 8 ports and avoid access to the nonexistant
+        * ports.
+        */
+       vsc->ds = dsa_switch_alloc(dev, 8);
+       if (!vsc->ds)
+               return -ENOMEM;
+       vsc->ds->priv = vsc;
+
+       vsc->ds->ops = &vsc73xx_ds_ops;
+       ret = dsa_register_switch(vsc->ds);
+       if (ret) {
+               dev_err(dev, "unable to register switch (%d)\n", ret);
+               return ret;
+       }
+
+       ret = vsc73xx_gpio_probe(vsc);
+       if (ret) {
+               dsa_unregister_switch(vsc->ds);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int vsc73xx_remove(struct spi_device *spi)
+{
+       struct vsc73xx *vsc = spi_get_drvdata(spi);
+
+       dsa_unregister_switch(vsc->ds);
+       gpiod_set_value(vsc->reset, 1);
+
+       return 0;
+}
+
+static const struct of_device_id vsc73xx_of_match[] = {
+       {
+               .compatible = "vitesse,vsc7385",
+       },
+       {
+               .compatible = "vitesse,vsc7388",
+       },
+       {
+               .compatible = "vitesse,vsc7395",
+       },
+       {
+               .compatible = "vitesse,vsc7398",
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
+
+static struct spi_driver vsc73xx_driver = {
+       .probe = vsc73xx_probe,
+       .remove = vsc73xx_remove,
+       .driver = {
+               .name = "vsc73xx",
+               .of_match_table = vsc73xx_of_match,
+       },
+};
+module_spi_driver(vsc73xx_driver);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+MODULE_DESCRIPTION("Vitesse VSC7385/7388/7395/7398 driver");
+MODULE_LICENSE("GPL v2");
index af766fd61151643d260b24e51b30fd68f5a2d9e8..6fde68aa13a40de376f472df200f36752f6a247e 100644 (file)
@@ -81,7 +81,6 @@ source "drivers/net/ethernet/huawei/Kconfig"
 source "drivers/net/ethernet/i825xx/Kconfig"
 source "drivers/net/ethernet/ibm/Kconfig"
 source "drivers/net/ethernet/intel/Kconfig"
-source "drivers/net/ethernet/neterion/Kconfig"
 source "drivers/net/ethernet/xscale/Kconfig"
 
 config JME
@@ -128,6 +127,7 @@ config FEALNX
          cards. <http://www.myson.com.tw/>
 
 source "drivers/net/ethernet/natsemi/Kconfig"
+source "drivers/net/ethernet/neterion/Kconfig"
 source "drivers/net/ethernet/netronome/Kconfig"
 source "drivers/net/ethernet/ni/Kconfig"
 source "drivers/net/ethernet/8390/Kconfig"
index 8fbfe9ce2fa53a69673671871465e816b2386ba6..b45d5f626b592356d222e9967c3b68a96dfb7c3f 100644 (file)
@@ -20,7 +20,7 @@ obj-$(CONFIG_NET_VENDOR_AQUANTIA) += aquantia/
 obj-$(CONFIG_NET_VENDOR_ARC) += arc/
 obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/
 obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/
-obj-$(CONFIG_NET_CADENCE) += cadence/
+obj-$(CONFIG_NET_VENDOR_CADENCE) += cadence/
 obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/
 obj-$(CONFIG_NET_VENDOR_BROCADE) += brocade/
 obj-$(CONFIG_NET_CALXEDA_XGMAC) += calxeda/
@@ -36,7 +36,6 @@ obj-$(CONFIG_NET_VENDOR_DEC) += dec/
 obj-$(CONFIG_NET_VENDOR_DLINK) += dlink/
 obj-$(CONFIG_NET_VENDOR_EMULEX) += emulex/
 obj-$(CONFIG_NET_VENDOR_EZCHIP) += ezchip/
-obj-$(CONFIG_NET_VENDOR_EXAR) += neterion/
 obj-$(CONFIG_NET_VENDOR_FARADAY) += faraday/
 obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/
 obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/
@@ -60,6 +59,7 @@ obj-$(CONFIG_NET_VENDOR_MOXART) += moxa/
 obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/
 obj-$(CONFIG_FEALNX) += fealnx.o
 obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/
+obj-$(CONFIG_NET_VENDOR_NETERION) += neterion/
 obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/
 obj-$(CONFIG_NET_VENDOR_NI) += ni/
 obj-$(CONFIG_NET_NETX) += netx-eth.o
@@ -68,7 +68,7 @@ obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/
 obj-$(CONFIG_LPC_ENET) += nxp/
 obj-$(CONFIG_NET_VENDOR_OKI) += oki-semi/
 obj-$(CONFIG_ETHOC) += ethoc.o
-obj-$(CONFIG_NET_PACKET_ENGINE) += packetengines/
+obj-$(CONFIG_NET_VENDOR_PACKET_ENGINES) += packetengines/
 obj-$(CONFIG_NET_VENDOR_PASEMI) += pasemi/
 obj-$(CONFIG_NET_VENDOR_QLOGIC) += qlogic/
 obj-$(CONFIG_NET_VENDOR_QUALCOMM) += qualcomm/
@@ -80,8 +80,7 @@ obj-$(CONFIG_NET_VENDOR_SAMSUNG) += samsung/
 obj-$(CONFIG_NET_VENDOR_SEEQ) += seeq/
 obj-$(CONFIG_NET_VENDOR_SILAN) += silan/
 obj-$(CONFIG_NET_VENDOR_SIS) += sis/
-obj-$(CONFIG_SFC) += sfc/
-obj-$(CONFIG_SFC_FALCON) += sfc/falcon/
+obj-$(CONFIG_NET_VENDOR_SOLARFLARE) += sfc/
 obj-$(CONFIG_NET_VENDOR_SGI) += sgi/
 obj-$(CONFIG_NET_VENDOR_SMSC) += smsc/
 obj-$(CONFIG_NET_VENDOR_SOCIONEXT) += socionext/
index 3872ab96b80a39eecbb1d0b8150a2e8288915e46..097467f44b0d609620ec23455851052fa6efb874 100644 (file)
@@ -802,7 +802,7 @@ static int starfire_init_one(struct pci_dev *pdev,
                int mii_status;
                for (phy = 0; phy < 32 && phy_idx < PHY_CNT; phy++) {
                        mdio_write(dev, phy, MII_BMCR, BMCR_RESET);
-                       mdelay(100);
+                       msleep(100);
                        boguscnt = 1000;
                        while (--boguscnt > 0)
                                if ((mdio_read(dev, phy, MII_BMCR) & BMCR_RESET) == 0)
index 8f71b79b494900fa4c03564e4f88ded8aa8831c5..08945baee48ad45df9f4bbf83afebbe0ab15607c 100644 (file)
@@ -1933,7 +1933,7 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
        while (idx != rxretprd) {
                struct ring_info *rip;
                struct sk_buff *skb;
-               struct rx_desc *rxdesc, *retdesc;
+               struct rx_desc *retdesc;
                u32 skbidx;
                int bd_flags, desc_type, mapsize;
                u16 csum;
@@ -1959,19 +1959,16 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
                case 0:
                        rip = &ap->skb->rx_std_skbuff[skbidx];
                        mapsize = ACE_STD_BUFSIZE;
-                       rxdesc = &ap->rx_std_ring[skbidx];
                        std_count++;
                        break;
                case BD_FLG_JUMBO:
                        rip = &ap->skb->rx_jumbo_skbuff[skbidx];
                        mapsize = ACE_JUMBO_BUFSIZE;
-                       rxdesc = &ap->rx_jumbo_ring[skbidx];
                        atomic_dec(&ap->cur_jumbo_bufs);
                        break;
                case BD_FLG_MINI:
                        rip = &ap->skb->rx_mini_skbuff[skbidx];
                        mapsize = ACE_MINI_BUFSIZE;
-                       rxdesc = &ap->rx_mini_ring[skbidx];
                        mini_count++;
                        break;
                default:
index f2af87d70594fca1b3c42085858fb323da295506..c673ac2df65bdf3f9b4d03403be705b581505657 100644 (file)
@@ -2213,7 +2213,8 @@ static void ena_netpoll(struct net_device *netdev)
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
-                           void *accel_priv, select_queue_fallback_t fallback)
+                           struct net_device *sb_dev,
+                           select_queue_fallback_t fallback)
 {
        u16 qid;
        /* we suspect that this is good for in--kernel network services that
@@ -2223,7 +2224,7 @@ static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
        if (skb_rx_queue_recorded(skb))
                qid = skb_get_rx_queue(skb);
        else
-               qid = fallback(dev, skb);
+               qid = fallback(dev, skb, NULL);
 
        return qid;
 }
index be198cc0b10c9c7664bdf80e0f93bb3db69e8cf9..f5ad12c109344e9ab7bee31f87eb19c49bc3312a 100644 (file)
@@ -2036,22 +2036,22 @@ static int pcnet32_alloc_ring(struct net_device *dev, const char *name)
        }
 
        lp->tx_dma_addr = kcalloc(lp->tx_ring_size, sizeof(dma_addr_t),
-                                 GFP_ATOMIC);
+                                 GFP_KERNEL);
        if (!lp->tx_dma_addr)
                return -ENOMEM;
 
        lp->rx_dma_addr = kcalloc(lp->rx_ring_size, sizeof(dma_addr_t),
-                                 GFP_ATOMIC);
+                                 GFP_KERNEL);
        if (!lp->rx_dma_addr)
                return -ENOMEM;
 
        lp->tx_skbuff = kcalloc(lp->tx_ring_size, sizeof(struct sk_buff *),
-                               GFP_ATOMIC);
+                               GFP_KERNEL);
        if (!lp->tx_skbuff)
                return -ENOMEM;
 
        lp->rx_skbuff = kcalloc(lp->rx_ring_size, sizeof(struct sk_buff *),
-                               GFP_ATOMIC);
+                               GFP_KERNEL);
        if (!lp->rx_skbuff)
                return -ENOMEM;
 
index cc1e4f820e64f39f28595750a4163bc64e7b1823..533094233659b6e218d87a1eb422a303eec98b65 100644 (file)
@@ -289,7 +289,7 @@ static int xgbe_alloc_pages(struct xgbe_prv_data *pdata,
        struct page *pages = NULL;
        dma_addr_t pages_dma;
        gfp_t gfp;
-       int order, ret;
+       int order;
 
 again:
        order = alloc_order;
@@ -316,10 +316,9 @@ again:
        /* Map the pages */
        pages_dma = dma_map_page(pdata->dev, pages, 0,
                                 PAGE_SIZE << order, DMA_FROM_DEVICE);
-       ret = dma_mapping_error(pdata->dev, pages_dma);
-       if (ret) {
+       if (dma_mapping_error(pdata->dev, pages_dma)) {
                put_page(pages);
-               return ret;
+               return -ENOMEM;
        }
 
        pa->pages = pages;
index f2d8063a2cefd8f7581f0e2182b81b1ce773a92a..08c9fa6ca71f273b5695887005ce6a5025fbc9c5 100644 (file)
@@ -11,6 +11,7 @@
 
 #include "aq_ethtool.h"
 #include "aq_nic.h"
+#include "aq_vec.h"
 
 static void aq_ethtool_get_regs(struct net_device *ndev,
                                struct ethtool_regs *regs, void *p)
@@ -284,6 +285,117 @@ static int aq_ethtool_set_coalesce(struct net_device *ndev,
        return aq_nic_update_interrupt_moderation_settings(aq_nic);
 }
 
+static int aq_ethtool_nway_reset(struct net_device *ndev)
+{
+       struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+       if (unlikely(!aq_nic->aq_fw_ops->renegotiate))
+               return -EOPNOTSUPP;
+
+       if (netif_running(ndev))
+               return aq_nic->aq_fw_ops->renegotiate(aq_nic->aq_hw);
+
+       return 0;
+}
+
+static void aq_ethtool_get_pauseparam(struct net_device *ndev,
+                                     struct ethtool_pauseparam *pause)
+{
+       struct aq_nic_s *aq_nic = netdev_priv(ndev);
+
+       pause->autoneg = 0;
+
+       if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_RX)
+               pause->rx_pause = 1;
+       if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_TX)
+               pause->tx_pause = 1;
+}
+
+static int aq_ethtool_set_pauseparam(struct net_device *ndev,
+                                    struct ethtool_pauseparam *pause)
+{
+       struct aq_nic_s *aq_nic = netdev_priv(ndev);
+       int err = 0;
+
+       if (!aq_nic->aq_fw_ops->set_flow_control)
+               return -EOPNOTSUPP;
+
+       if (pause->autoneg == AUTONEG_ENABLE)
+               return -EOPNOTSUPP;
+
+       if (pause->rx_pause)
+               aq_nic->aq_hw->aq_nic_cfg->flow_control |= AQ_NIC_FC_RX;
+       else
+               aq_nic->aq_hw->aq_nic_cfg->flow_control &= ~AQ_NIC_FC_RX;
+
+       if (pause->tx_pause)
+               aq_nic->aq_hw->aq_nic_cfg->flow_control |= AQ_NIC_FC_TX;
+       else
+               aq_nic->aq_hw->aq_nic_cfg->flow_control &= ~AQ_NIC_FC_TX;
+
+       err = aq_nic->aq_fw_ops->set_flow_control(aq_nic->aq_hw);
+
+       return err;
+}
+
+static void aq_get_ringparam(struct net_device *ndev,
+                            struct ethtool_ringparam *ring)
+{
+       struct aq_nic_s *aq_nic = netdev_priv(ndev);
+       struct aq_nic_cfg_s *aq_nic_cfg = aq_nic_get_cfg(aq_nic);
+
+       ring->rx_pending = aq_nic_cfg->rxds;
+       ring->tx_pending = aq_nic_cfg->txds;
+
+       ring->rx_max_pending = aq_nic_cfg->aq_hw_caps->rxds_max;
+       ring->tx_max_pending = aq_nic_cfg->aq_hw_caps->txds_max;
+}
+
+static int aq_set_ringparam(struct net_device *ndev,
+                           struct ethtool_ringparam *ring)
+{
+       int err = 0;
+       bool ndev_running = false;
+       struct aq_nic_s *aq_nic = netdev_priv(ndev);
+       struct aq_nic_cfg_s *aq_nic_cfg = aq_nic_get_cfg(aq_nic);
+       const struct aq_hw_caps_s *hw_caps = aq_nic_cfg->aq_hw_caps;
+
+       if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
+               err = -EOPNOTSUPP;
+               goto err_exit;
+       }
+
+       if (netif_running(ndev)) {
+               ndev_running = true;
+               dev_close(ndev);
+       }
+
+       aq_nic_free_vectors(aq_nic);
+
+       aq_nic_cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min);
+       aq_nic_cfg->rxds = min(aq_nic_cfg->rxds, hw_caps->rxds_max);
+       aq_nic_cfg->rxds = ALIGN(aq_nic_cfg->rxds, AQ_HW_RXD_MULTIPLE);
+
+       aq_nic_cfg->txds = max(ring->tx_pending, hw_caps->txds_min);
+       aq_nic_cfg->txds = min(aq_nic_cfg->txds, hw_caps->txds_max);
+       aq_nic_cfg->txds = ALIGN(aq_nic_cfg->txds, AQ_HW_TXD_MULTIPLE);
+
+       for (aq_nic->aq_vecs = 0; aq_nic->aq_vecs < aq_nic_cfg->vecs;
+            aq_nic->aq_vecs++) {
+               aq_nic->aq_vec[aq_nic->aq_vecs] =
+                   aq_vec_alloc(aq_nic, aq_nic->aq_vecs, aq_nic_cfg);
+               if (unlikely(!aq_nic->aq_vec[aq_nic->aq_vecs])) {
+                       err = -ENOMEM;
+                       goto err_exit;
+               }
+       }
+       if (ndev_running)
+               err = dev_open(ndev);
+
+err_exit:
+       return err;
+}
+
 const struct ethtool_ops aq_ethtool_ops = {
        .get_link            = aq_ethtool_get_link,
        .get_regs_len        = aq_ethtool_get_regs_len,
@@ -291,6 +403,11 @@ const struct ethtool_ops aq_ethtool_ops = {
        .get_drvinfo         = aq_ethtool_get_drvinfo,
        .get_strings         = aq_ethtool_get_strings,
        .get_rxfh_indir_size = aq_ethtool_get_rss_indir_size,
+       .nway_reset          = aq_ethtool_nway_reset,
+       .get_ringparam       = aq_get_ringparam,
+       .set_ringparam       = aq_set_ringparam,
+       .get_pauseparam      = aq_ethtool_get_pauseparam,
+       .set_pauseparam      = aq_ethtool_set_pauseparam,
        .get_rxfh_key_size   = aq_ethtool_get_rss_key_size,
        .get_rxfh            = aq_ethtool_get_rss,
        .get_rxnfc           = aq_ethtool_get_rxnfc,
index 2c6ebd91a9f2782e87472e497447b60974a7a571..5c00671f248df2087ac8665c08214feda9c744d1 100644 (file)
@@ -24,8 +24,10 @@ struct aq_hw_caps_s {
        u64 link_speed_msk;
        unsigned int hw_priv_flags;
        u32 media_type;
-       u32 rxds;
-       u32 txds;
+       u32 rxds_max;
+       u32 txds_max;
+       u32 rxds_min;
+       u32 txds_min;
        u32 txhwb_alignment;
        u32 irq_mask;
        u32 vecs;
@@ -98,6 +100,9 @@ struct aq_stats_s {
 #define AQ_HW_MEDIA_TYPE_TP    1U
 #define AQ_HW_MEDIA_TYPE_FIBRE 2U
 
+#define AQ_HW_TXD_MULTIPLE 8U
+#define AQ_HW_RXD_MULTIPLE 8U
+
 #define AQ_HW_MULTICAST_ADDRESS_MAX     32U
 
 struct aq_hw_s {
@@ -199,25 +204,30 @@ struct aq_hw_ops {
 
        int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version);
 
-       int (*hw_deinit)(struct aq_hw_s *self);
-
        int (*hw_set_power)(struct aq_hw_s *self, unsigned int power_state);
 };
 
 struct aq_fw_ops {
        int (*init)(struct aq_hw_s *self);
 
+       int (*deinit)(struct aq_hw_s *self);
+
        int (*reset)(struct aq_hw_s *self);
 
+       int (*renegotiate)(struct aq_hw_s *self);
+
        int (*get_mac_permanent)(struct aq_hw_s *self, u8 *mac);
 
        int (*set_link_speed)(struct aq_hw_s *self, u32 speed);
 
-       int (*set_state)(struct aq_hw_s *self, enum hal_atl_utils_fw_state_e state);
+       int (*set_state)(struct aq_hw_s *self,
+                        enum hal_atl_utils_fw_state_e state);
 
        int (*update_link_status)(struct aq_hw_s *self);
 
        int (*update_stats)(struct aq_hw_s *self);
+
+       int (*set_flow_control)(struct aq_hw_s *self);
 };
 
 #endif /* AQ_HW_H */
index 7a22d0257e04ccf07ef87cae18d5d4f87630660a..26dc6782b4750f311f8311f1523e6ea98bc08f5b 100644 (file)
@@ -89,8 +89,8 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
        aq_nic_rss_init(self, cfg->num_rss_queues);
 
        /*descriptors */
-       cfg->rxds = min(cfg->aq_hw_caps->rxds, AQ_CFG_RXDS_DEF);
-       cfg->txds = min(cfg->aq_hw_caps->txds, AQ_CFG_TXDS_DEF);
+       cfg->rxds = min(cfg->aq_hw_caps->rxds_max, AQ_CFG_RXDS_DEF);
+       cfg->txds = min(cfg->aq_hw_caps->txds_max, AQ_CFG_TXDS_DEF);
 
        /*rss rings */
        cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF);
@@ -768,10 +768,14 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self,
                ethtool_link_ksettings_add_link_mode(cmd, advertising,
                                                     100baseT_Full);
 
-       if (self->aq_nic_cfg.flow_control)
+       if (self->aq_nic_cfg.flow_control & AQ_NIC_FC_RX)
                ethtool_link_ksettings_add_link_mode(cmd, advertising,
                                                     Pause);
 
+       if (self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX)
+               ethtool_link_ksettings_add_link_mode(cmd, advertising,
+                                                    Asym_Pause);
+
        if (self->aq_nic_cfg.aq_hw_caps->media_type == AQ_HW_MEDIA_TYPE_FIBRE)
                ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
        else
@@ -886,7 +890,7 @@ void aq_nic_deinit(struct aq_nic_s *self)
                aq_vec_deinit(aq_vec);
 
        if (self->power_state == AQ_HW_POWER_STATE_D0) {
-               (void)self->aq_hw_ops->hw_deinit(self->aq_hw);
+               (void)self->aq_fw_ops->deinit(self->aq_hw);
        } else {
                (void)self->aq_hw_ops->hw_set_power(self->aq_hw,
                                                   self->power_state);
index 8cc6abadc03b90e88fb58b09a53e7da3702710e5..97addfa6f89569d7d23f8b60f42d67030c55ac0a 100644 (file)
 #include "hw_atl_a0_internal.h"
 
 #define DEFAULT_A0_BOARD_BASIC_CAPABILITIES \
-       .is_64_dma = true, \
-       .msix_irqs = 4U, \
-       .irq_mask = ~0U, \
-       .vecs = HW_ATL_A0_RSS_MAX, \
-       .tcs = HW_ATL_A0_TC_MAX, \
-       .rxd_alignment = 1U, \
-       .rxd_size = HW_ATL_A0_RXD_SIZE, \
-       .rxds = 248U, \
-       .txd_alignment = 1U, \
-       .txd_size = HW_ATL_A0_TXD_SIZE, \
-       .txds = 8U * 1024U, \
-       .txhwb_alignment = 4096U, \
-       .tx_rings = HW_ATL_A0_TX_RINGS, \
-       .rx_rings = HW_ATL_A0_RX_RINGS, \
-       .hw_features = NETIF_F_HW_CSUM | \
-                       NETIF_F_RXHASH | \
-                       NETIF_F_RXCSUM | \
-                       NETIF_F_SG | \
-                       NETIF_F_TSO, \
+       .is_64_dma = true,                \
+       .msix_irqs = 4U,                  \
+       .irq_mask = ~0U,                  \
+       .vecs = HW_ATL_A0_RSS_MAX,        \
+       .tcs = HW_ATL_A0_TC_MAX,          \
+       .rxd_alignment = 1U,              \
+       .rxd_size = HW_ATL_A0_RXD_SIZE,   \
+       .rxds_max = HW_ATL_A0_MAX_RXD,    \
+       .rxds_min = HW_ATL_A0_MIN_RXD,    \
+       .txd_alignment = 1U,              \
+       .txd_size = HW_ATL_A0_TXD_SIZE,   \
+       .txds_max = HW_ATL_A0_MAX_TXD,    \
+       .txds_min = HW_ATL_A0_MIN_RXD,    \
+       .txhwb_alignment = 4096U,         \
+       .tx_rings = HW_ATL_A0_TX_RINGS,   \
+       .rx_rings = HW_ATL_A0_RX_RINGS,   \
+       .hw_features = NETIF_F_HW_CSUM |  \
+                       NETIF_F_RXHASH |  \
+                       NETIF_F_RXCSUM |  \
+                       NETIF_F_SG |      \
+                       NETIF_F_TSO,      \
        .hw_priv_flags = IFF_UNICAST_FLT, \
-       .flow_control = true, \
-       .mtu = HW_ATL_A0_MTU_JUMBO, \
-       .mac_regs_count = 88, \
+       .flow_control = true,             \
+       .mtu = HW_ATL_A0_MTU_JUMBO,       \
+       .mac_regs_count = 88,             \
        .hw_alive_check_addr = 0x10U
 
 const struct aq_hw_caps_s hw_atl_a0_caps_aqc100 = {
@@ -875,7 +877,6 @@ static int hw_atl_a0_hw_ring_rx_stop(struct aq_hw_s *self,
 const struct aq_hw_ops hw_atl_ops_a0 = {
        .hw_set_mac_address   = hw_atl_a0_hw_mac_addr_set,
        .hw_init              = hw_atl_a0_hw_init,
-       .hw_deinit            = hw_atl_utils_hw_deinit,
        .hw_set_power         = hw_atl_utils_hw_set_power,
        .hw_reset             = hw_atl_a0_hw_reset,
        .hw_start             = hw_atl_a0_hw_start,
index 1d8855558d74b902702902ce740d2a92c97f9bd7..3c94cff57876dcb7d59eee357b05be3a02f6f2b7 100644 (file)
 
 #define HW_ATL_A0_FW_VER_EXPECTED 0x01050006U
 
+#define HW_ATL_A0_MIN_RXD \
+       (ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_RXD_MULTIPLE))
+#define HW_ATL_A0_MIN_TXD \
+       (ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_TXD_MULTIPLE))
+
+#define HW_ATL_A0_MAX_RXD 8184U
+#define HW_ATL_A0_MAX_TXD 8184U
+
 #endif /* HW_ATL_A0_INTERNAL_H */
index 956860a697970ab427be0357d8541e929a85c489..4809bf4baa34f821317d3156af2fc0ee156a88ba 100644 (file)
 #include "hw_atl_llh_internal.h"
 
 #define DEFAULT_B0_BOARD_BASIC_CAPABILITIES \
-       .is_64_dma = true,      \
-       .msix_irqs = 4U,        \
-       .irq_mask = ~0U,        \
-       .vecs = HW_ATL_B0_RSS_MAX,      \
-       .tcs = HW_ATL_B0_TC_MAX,        \
-       .rxd_alignment = 1U,            \
-       .rxd_size = HW_ATL_B0_RXD_SIZE, \
-       .rxds = 4U * 1024U,             \
-       .txd_alignment = 1U,            \
-       .txd_size = HW_ATL_B0_TXD_SIZE, \
-       .txds = 8U * 1024U,             \
-       .txhwb_alignment = 4096U,       \
-       .tx_rings = HW_ATL_B0_TX_RINGS, \
-       .rx_rings = HW_ATL_B0_RX_RINGS, \
-       .hw_features = NETIF_F_HW_CSUM | \
-                       NETIF_F_RXCSUM | \
-                       NETIF_F_RXHASH | \
-                       NETIF_F_SG |  \
-                       NETIF_F_TSO | \
-                       NETIF_F_LRO,  \
-       .hw_priv_flags = IFF_UNICAST_FLT,   \
-       .flow_control = true,           \
-       .mtu = HW_ATL_B0_MTU_JUMBO,     \
-       .mac_regs_count = 88,           \
+       .is_64_dma = true,                \
+       .msix_irqs = 4U,                  \
+       .irq_mask = ~0U,                  \
+       .vecs = HW_ATL_B0_RSS_MAX,        \
+       .tcs = HW_ATL_B0_TC_MAX,          \
+       .rxd_alignment = 1U,              \
+       .rxd_size = HW_ATL_B0_RXD_SIZE,   \
+       .rxds_max = HW_ATL_B0_MAX_RXD,    \
+       .rxds_min = HW_ATL_B0_MIN_RXD,    \
+       .txd_alignment = 1U,              \
+       .txd_size = HW_ATL_B0_TXD_SIZE,   \
+       .txds_max = HW_ATL_B0_MAX_TXD,    \
+       .txds_min = HW_ATL_B0_MIN_TXD,    \
+       .txhwb_alignment = 4096U,         \
+       .tx_rings = HW_ATL_B0_TX_RINGS,   \
+       .rx_rings = HW_ATL_B0_RX_RINGS,   \
+       .hw_features = NETIF_F_HW_CSUM |  \
+                       NETIF_F_RXCSUM |  \
+                       NETIF_F_RXHASH |  \
+                       NETIF_F_SG |      \
+                       NETIF_F_TSO |     \
+                       NETIF_F_LRO,      \
+       .hw_priv_flags = IFF_UNICAST_FLT, \
+       .flow_control = true,             \
+       .mtu = HW_ATL_B0_MTU_JUMBO,       \
+       .mac_regs_count = 88,             \
        .hw_alive_check_addr = 0x10U
 
 const struct aq_hw_caps_s hw_atl_b0_caps_aqc100 = {
@@ -933,7 +935,6 @@ static int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self,
 const struct aq_hw_ops hw_atl_ops_b0 = {
        .hw_set_mac_address   = hw_atl_b0_hw_mac_addr_set,
        .hw_init              = hw_atl_b0_hw_init,
-       .hw_deinit            = hw_atl_utils_hw_deinit,
        .hw_set_power         = hw_atl_utils_hw_set_power,
        .hw_reset             = hw_atl_b0_hw_reset,
        .hw_start             = hw_atl_b0_hw_start,
index 405d1455c22250bd6f5b7dcce531b269758a305b..28568f5fa74b0f2d72d460c755fad49c09089889 100644 (file)
 #define HW_ATL_INTR_MODER_MAX  0x1FF
 #define HW_ATL_INTR_MODER_MIN  0xFF
 
+#define HW_ATL_B0_MIN_RXD \
+       (ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_RXD_MULTIPLE))
+#define HW_ATL_B0_MIN_TXD \
+       (ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_TXD_MULTIPLE))
+
+#define HW_ATL_B0_MAX_RXD 8184U
+#define HW_ATL_B0_MAX_TXD 8184U
+
 /* HW layer capabilities */
 
 #endif /* HW_ATL_B0_INTERNAL_H */
index e652d86b87d40eb9c0050c7ce525c2a5e3ab2513..c965e65d07db3be832b0edd332eaedfb17976143 100644 (file)
 #define HW_ATL_MPI_CONTROL_ADR  0x0368U
 #define HW_ATL_MPI_STATE_ADR    0x036CU
 
-#define HW_ATL_MPI_STATE_MSK    0x00FFU
-#define HW_ATL_MPI_STATE_SHIFT  0U
-#define HW_ATL_MPI_SPEED_MSK    0xFFFF0000U
-#define HW_ATL_MPI_SPEED_SHIFT  16U
+#define HW_ATL_MPI_STATE_MSK      0x00FFU
+#define HW_ATL_MPI_STATE_SHIFT    0U
+#define HW_ATL_MPI_SPEED_MSK      0x00FF0000U
+#define HW_ATL_MPI_SPEED_SHIFT    16U
+#define HW_ATL_MPI_DIRTY_WAKE_MSK 0x02000000U
 
 #define HW_ATL_MPI_DAISY_CHAIN_STATUS  0x704
 #define HW_ATL_MPI_BOOT_EXIT_CODE      0x388
@@ -525,19 +526,20 @@ static int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed)
 {
        u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
 
-       val = (val & HW_ATL_MPI_STATE_MSK) | (speed << HW_ATL_MPI_SPEED_SHIFT);
+       val = val & ~HW_ATL_MPI_SPEED_MSK;
+       val |= speed << HW_ATL_MPI_SPEED_SHIFT;
        aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, val);
 
        return 0;
 }
 
-void hw_atl_utils_mpi_set(struct aq_hw_s *self,
-                         enum hal_atl_utils_fw_state_e state,
-                         u32 speed)
+static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
+                                     enum hal_atl_utils_fw_state_e state)
 {
        int err = 0;
        u32 transaction_id = 0;
        struct hw_aq_atl_utils_mbox_header mbox;
+       u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
 
        if (state == MPI_RESET) {
                hw_atl_utils_mpi_read_mbox(self, &mbox);
@@ -551,21 +553,21 @@ void hw_atl_utils_mpi_set(struct aq_hw_s *self,
                if (err < 0)
                        goto err_exit;
        }
+       /* On interface DEINIT we disable DW (raise bit)
+        * Otherwise enable DW (clear bit)
+        */
+       if (state == MPI_DEINIT || state == MPI_POWER)
+               val |= HW_ATL_MPI_DIRTY_WAKE_MSK;
+       else
+               val &= ~HW_ATL_MPI_DIRTY_WAKE_MSK;
 
-       aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR,
-                       (speed << HW_ATL_MPI_SPEED_SHIFT) | state);
-
-err_exit:;
-}
-
-static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
-                                     enum hal_atl_utils_fw_state_e state)
-{
-       u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
+       /* Set new state bits */
+       val = val & ~HW_ATL_MPI_STATE_MSK;
+       val |= state & HW_ATL_MPI_STATE_MSK;
 
-       val = state | (val & HW_ATL_MPI_SPEED_MSK);
        aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, val);
-       return 0;
+err_exit:
+       return err;
 }
 
 int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self)
@@ -721,16 +723,18 @@ void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p)
        *p = chip_features;
 }
 
-int hw_atl_utils_hw_deinit(struct aq_hw_s *self)
+static int hw_atl_fw1x_deinit(struct aq_hw_s *self)
 {
-       hw_atl_utils_mpi_set(self, MPI_DEINIT, 0x0U);
+       hw_atl_utils_mpi_set_speed(self, 0);
+       hw_atl_utils_mpi_set_state(self, MPI_DEINIT);
        return 0;
 }
 
 int hw_atl_utils_hw_set_power(struct aq_hw_s *self,
                              unsigned int power_state)
 {
-       hw_atl_utils_mpi_set(self, MPI_POWER, 0x0U);
+       hw_atl_utils_mpi_set_speed(self, 0);
+       hw_atl_utils_mpi_set_state(self, MPI_POWER);
        return 0;
 }
 
@@ -823,10 +827,12 @@ int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version)
 
 const struct aq_fw_ops aq_fw_1x_ops = {
        .init = hw_atl_utils_mpi_create,
+       .deinit = hw_atl_fw1x_deinit,
        .reset = NULL,
        .get_mac_permanent = hw_atl_utils_get_mac_permanent,
        .set_link_speed = hw_atl_utils_mpi_set_speed,
        .set_state = hw_atl_utils_mpi_set_state,
        .update_link_status = hw_atl_utils_mpi_get_link_status,
        .update_stats = hw_atl_utils_update_stats,
+       .set_flow_control = NULL,
 };
index cd8f18f39c611f8f709f71c7a1c23da8332a3fa4..b875590efcbddbeb5983f2da99f9785d4298734c 100644 (file)
@@ -239,6 +239,41 @@ enum hw_atl_fw2x_caps_hi {
        CAPS_HI_TRANSACTION_ID,
 };
 
+enum hw_atl_fw2x_ctrl {
+       CTRL_RESERVED1 = 0x00,
+       CTRL_RESERVED2,
+       CTRL_RESERVED3,
+       CTRL_PAUSE,
+       CTRL_ASYMMETRIC_PAUSE,
+       CTRL_RESERVED4,
+       CTRL_RESERVED5,
+       CTRL_RESERVED6,
+       CTRL_1GBASET_FD_EEE,
+       CTRL_2P5GBASET_FD_EEE,
+       CTRL_5GBASET_FD_EEE,
+       CTRL_10GBASET_FD_EEE,
+       CTRL_THERMAL_SHUTDOWN,
+       CTRL_PHY_LOGS,
+       CTRL_EEE_AUTO_DISABLE,
+       CTRL_PFC,
+       CTRL_WAKE_ON_LINK,
+       CTRL_CABLE_DIAG,
+       CTRL_TEMPERATURE,
+       CTRL_DOWNSHIFT,
+       CTRL_PTP_AVB,
+       CTRL_RESERVED7,
+       CTRL_LINK_DROP,
+       CTRL_SLEEP_PROXY,
+       CTRL_WOL,
+       CTRL_MAC_STOP,
+       CTRL_EXT_LOOPBACK,
+       CTRL_INT_LOOPBACK,
+       CTRL_RESERVED8,
+       CTRL_WOL_TIMER,
+       CTRL_STATISTICS,
+       CTRL_FORCE_RECONNECT,
+};
+
 struct aq_hw_s;
 struct aq_fw_ops;
 struct aq_hw_caps_s;
index 39cd3a27fe776cdee650fe7e55dfb2bdabd35ac9..e37943760a58b2a88b33de295e73dfbac71fc5bf 100644 (file)
 #define HW_ATL_FW2X_MPI_STATE_ADDR     0x370
 #define HW_ATL_FW2X_MPI_STATE2_ADDR    0x374
 
+static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed);
+static int aq_fw2x_set_state(struct aq_hw_s *self,
+                            enum hal_atl_utils_fw_state_e state);
+
 static int aq_fw2x_init(struct aq_hw_s *self)
 {
        int err = 0;
@@ -39,6 +43,16 @@ static int aq_fw2x_init(struct aq_hw_s *self)
        return err;
 }
 
+static int aq_fw2x_deinit(struct aq_hw_s *self)
+{
+       int err = aq_fw2x_set_link_speed(self, 0);
+
+       if (!err)
+               err = aq_fw2x_set_state(self, MPI_DEINIT);
+
+       return err;
+}
+
 static enum hw_atl_fw2x_rate link_speed_mask_2fw2x_ratemask(u32 speed)
 {
        enum hw_atl_fw2x_rate rate = 0;
@@ -73,10 +87,38 @@ static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed)
        return 0;
 }
 
+static void aq_fw2x_set_mpi_flow_control(struct aq_hw_s *self, u32 *mpi_state)
+{
+       if (self->aq_nic_cfg->flow_control & AQ_NIC_FC_RX)
+               *mpi_state |= BIT(CAPS_HI_PAUSE);
+       else
+               *mpi_state &= ~BIT(CAPS_HI_PAUSE);
+
+       if (self->aq_nic_cfg->flow_control & AQ_NIC_FC_TX)
+               *mpi_state |= BIT(CAPS_HI_ASYMMETRIC_PAUSE);
+       else
+               *mpi_state &= ~BIT(CAPS_HI_ASYMMETRIC_PAUSE);
+}
+
 static int aq_fw2x_set_state(struct aq_hw_s *self,
                             enum hal_atl_utils_fw_state_e state)
 {
-       /* No explicit state in 2x fw */
+       u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+
+       switch (state) {
+       case MPI_INIT:
+               mpi_state &= ~BIT(CAPS_HI_LINK_DROP);
+               aq_fw2x_set_mpi_flow_control(self, &mpi_state);
+               break;
+       case MPI_DEINIT:
+               mpi_state |= BIT(CAPS_HI_LINK_DROP);
+               break;
+       case MPI_RESET:
+       case MPI_POWER:
+               /* No actions */
+               break;
+       }
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_state);
        return 0;
 }
 
@@ -173,12 +215,37 @@ static int aq_fw2x_update_stats(struct aq_hw_s *self)
        return hw_atl_utils_update_stats(self);
 }
 
+static int aq_fw2x_renegotiate(struct aq_hw_s *self)
+{
+       u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+
+       mpi_opts |= BIT(CTRL_FORCE_RECONNECT);
+
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+       return 0;
+}
+
+static int aq_fw2x_set_flow_control(struct aq_hw_s *self)
+{
+       u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+
+       aq_fw2x_set_mpi_flow_control(self, &mpi_state);
+
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_state);
+
+       return 0;
+}
+
 const struct aq_fw_ops aq_fw_2x_ops = {
        .init = aq_fw2x_init,
+       .deinit = aq_fw2x_deinit,
        .reset = NULL,
+       .renegotiate = aq_fw2x_renegotiate,
        .get_mac_permanent = aq_fw2x_get_mac_permanent,
        .set_link_speed = aq_fw2x_set_link_speed,
        .set_state = aq_fw2x_set_state,
        .update_link_status = aq_fw2x_update_link_status,
        .update_stats = aq_fw2x_update_stats,
+       .set_flow_control   = aq_fw2x_set_flow_control,
 };
index a445de6837a6c8bff1c250d4702612f4795b2477..94efc6477bdcdab9f5be1ddf0162a26617f46bcf 100644 (file)
@@ -12,8 +12,8 @@
 
 #define NIC_MAJOR_DRIVER_VERSION           2
 #define NIC_MINOR_DRIVER_VERSION           0
-#define NIC_BUILD_DRIVER_VERSION           2
-#define NIC_REVISION_DRIVER_VERSION        1
+#define NIC_BUILD_DRIVER_VERSION           3
+#define NIC_REVISION_DRIVER_VERSION        0
 
 #define AQ_CFG_DRV_VERSION_SUFFIX "-kern"
 
index 5e5022fa1d047be078be911bc4f6cd0631f04de7..6d32211349275d260dbc7817c639df71c91b7084 100644 (file)
@@ -1279,7 +1279,6 @@ static void alx_check_link(struct alx_priv *alx)
        struct alx_hw *hw = &alx->hw;
        unsigned long flags;
        int old_speed;
-       u8 old_duplex;
        int err;
 
        /* clear PHY internal interrupt status, otherwise the main
@@ -1288,7 +1287,6 @@ static void alx_check_link(struct alx_priv *alx)
        alx_clear_phy_intr(hw);
 
        old_speed = hw->link_speed;
-       old_duplex = hw->duplex;
        err = alx_read_phy_link(hw);
        if (err < 0)
                goto reset;
index 8ba7f8ff3434000f57f45968d8a70148f4647cc8..392f564d8fd436f271f08040855f333eca0f15d7 100644 (file)
@@ -1,5 +1,6 @@
 config NET_VENDOR_AURORA
        bool "Aurora VLSI devices"
+       default y
        help
          If you have a network (Ethernet) device belonging to this class,
          say Y.
index e94159507847b33962f99d63561301b924fd2dd1..c8d1f8fa4713401321767218423f6a55375519be 100644 (file)
@@ -304,12 +304,10 @@ static int nb8800_poll(struct napi_struct *napi, int budget)
 
 again:
        do {
-               struct nb8800_rx_buf *rxb;
                unsigned int len;
 
                next = (last + 1) % RX_DESC_COUNT;
 
-               rxb = &priv->rx_bufs[next];
                rxd = &priv->rx_descs[next];
 
                if (!rxd->report)
index 4c3bfde6e8de00f2010b1329e05c8b36a16e158f..b7aa8ad96dfb72efef7dce80d257bf413b9365a8 100644 (file)
@@ -61,7 +61,7 @@ config BCM63XX_ENET
 
 config BCMGENET
        tristate "Broadcom GENET internal MAC support"
-       depends on OF && HAS_IOMEM
+       depends on HAS_IOMEM
        select MII
        select PHYLIB
        select FIXED_PHY
@@ -181,7 +181,7 @@ config BGMAC_PLATFORM
 
 config SYSTEMPORT
        tristate "Broadcom SYSTEMPORT internal MAC support"
-       depends on OF
+       depends on HAS_IOMEM
        depends on NET_DSA || !NET_DSA
        select MII
        select PHYLIB
index a1f60f89e05944458e98e7faa2292960368c5ef8..284581c9680e31f1db5aed1cdcec1e65b7596e6c 100644 (file)
@@ -1041,17 +1041,25 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
        return work_done;
 }
 
-static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
+static void mpd_enable_set(struct bcm_sysport_priv *priv, bool enable)
 {
        u32 reg;
 
+       reg = umac_readl(priv, UMAC_MPD_CTRL);
+       if (enable)
+               reg |= MPD_EN;
+       else
+               reg &= ~MPD_EN;
+       umac_writel(priv, reg, UMAC_MPD_CTRL);
+}
+
+static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
+{
        /* Stop monitoring MPD interrupt */
        intrl2_0_mask_set(priv, INTRL2_0_MPD);
 
        /* Clear the MagicPacket detection logic */
-       reg = umac_readl(priv, UMAC_MPD_CTRL);
-       reg &= ~MPD_EN;
-       umac_writel(priv, reg, UMAC_MPD_CTRL);
+       mpd_enable_set(priv, false);
 
        netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
 }
@@ -1102,10 +1110,8 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
        if (priv->irq0_stat & INTRL2_0_TX_RING_FULL)
                bcm_sysport_tx_reclaim_all(priv);
 
-       if (priv->irq0_stat & INTRL2_0_MPD) {
+       if (priv->irq0_stat & INTRL2_0_MPD)
                netdev_info(priv->netdev, "Wake-on-LAN interrupt!\n");
-               bcm_sysport_resume_from_wol(priv);
-       }
 
        if (!priv->is_lite)
                goto out;
@@ -2107,7 +2113,7 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = {
 };
 
 static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
-                                   void *accel_priv,
+                                   struct net_device *sb_dev,
                                    select_queue_fallback_t fallback)
 {
        struct bcm_sysport_priv *priv = netdev_priv(dev);
@@ -2116,7 +2122,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
        unsigned int q, port;
 
        if (!netdev_uses_dsa(dev))
-               return fallback(dev, skb);
+               return fallback(dev, skb, NULL);
 
        /* DSA tagging layer will have configured the correct queue */
        q = BRCM_TAG_GET_QUEUE(queue);
@@ -2124,7 +2130,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
        tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues];
 
        if (unlikely(!tx_ring))
-               return fallback(dev, skb);
+               return fallback(dev, skb, NULL);
 
        return tx_ring->index;
 }
@@ -2449,9 +2455,7 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv)
 
        /* Do not leave the UniMAC RBUF matching only MPD packets */
        if (!timeout) {
-               reg = umac_readl(priv, UMAC_MPD_CTRL);
-               reg &= ~MPD_EN;
-               umac_writel(priv, reg, UMAC_MPD_CTRL);
+               mpd_enable_set(priv, false);
                netif_err(priv, wol, ndev, "failed to enter WOL mode\n");
                return -ETIMEDOUT;
        }
index e6ea8e61f96ddc17c09a7f90f3f37727ca49f7ca..4c94d9218bba9c75cf8183a8b0feca7a9aaba18c 100644 (file)
@@ -236,7 +236,6 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 {
        struct device *dma_dev = bgmac->dma_dev;
        int empty_slot;
-       bool freed = false;
        unsigned bytes_compl = 0, pkts_compl = 0;
 
        /* The last slot that hardware didn't consume yet */
@@ -279,7 +278,6 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 
                slot->dma_addr = 0;
                ring->start++;
-               freed = true;
        }
 
        if (!pkts_compl)
index af7b5a4d8ba044800b0eb229d8c989c564515e94..5a727d4729da7348075b75101154cca3cf515073 100644 (file)
@@ -1910,7 +1910,8 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
 }
 
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-                      void *accel_priv, select_queue_fallback_t fallback)
+                      struct net_device *sb_dev,
+                      select_queue_fallback_t fallback)
 {
        struct bnx2x *bp = netdev_priv(dev);
 
@@ -1932,7 +1933,8 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
        }
 
        /* select a non-FCoE queue */
-       return fallback(dev, skb) % (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
+       return fallback(dev, skb, NULL) %
+              (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
 }
 
 void bnx2x_set_num_queues(struct bnx2x *bp)
index a8ce5c55bbb0ca29b5cca28171cd94733e7260eb..0e508e5defce315f2e5254ca238afe26b523054a 100644 (file)
@@ -497,7 +497,8 @@ int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
 
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-                      void *accel_priv, select_queue_fallback_t fallback);
+                      struct net_device *sb_dev,
+                      select_queue_fallback_t fallback);
 
 static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
                                        struct bnx2x_fastpath *fp,
index 22243c480a05341238850e71b9bd196bc705a064..98d4c5a3ff21171257765c0eda4333f0a8484143 100644 (file)
@@ -6339,6 +6339,7 @@ int bnx2x_set_led(struct link_params *params,
                 */
                if (!vars->link_up)
                        break;
+               /* else: fall through */
        case LED_MODE_ON:
                if (((params->phy[EXT_PHY1].type ==
                          PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727) ||
@@ -12521,11 +12522,13 @@ static void bnx2x_phy_def_cfg(struct link_params *params,
        switch (link_config  & PORT_FEATURE_LINK_SPEED_MASK) {
        case PORT_FEATURE_LINK_SPEED_10M_HALF:
                phy->req_duplex = DUPLEX_HALF;
+               /* fall through */
        case PORT_FEATURE_LINK_SPEED_10M_FULL:
                phy->req_line_speed = SPEED_10;
                break;
        case PORT_FEATURE_LINK_SPEED_100M_HALF:
                phy->req_duplex = DUPLEX_HALF;
+               /* fall through */
        case PORT_FEATURE_LINK_SPEED_100M_FULL:
                phy->req_line_speed = SPEED_100;
                break;
index 57348f2b49a31fd5b1ef5a67d2ba1e7945768ab0..71362b7f60402545c3b4aa2bc391a1e2d3cd7f7f 100644 (file)
@@ -8561,11 +8561,11 @@ int bnx2x_set_int_mode(struct bnx2x *bp)
                               bp->num_queues,
                               1 + bp->num_cnic_queues);
 
-               /* falling through... */
+               /* fall through */
        case BNX2X_INT_MODE_MSI:
                bnx2x_enable_msi(bp);
 
-               /* falling through... */
+               /* fall through */
        case BNX2X_INT_MODE_INTX:
                bp->num_ethernet_queues = 1;
                bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues;
index 8baf9d3eb4b1c1a1dd84abe031b7c19c8dbb7370..3f4d2c8da21a3a848b4149758883333522b6f77a 100644 (file)
@@ -3258,7 +3258,7 @@ static int bnx2x_mcast_validate_e2(struct bnx2x *bp,
        /* DEL command deletes all currently configured MACs */
        case BNX2X_MCAST_CMD_DEL:
                o->set_registry_size(o, 0);
-               /* Don't break */
+               /* fall through */
 
        /* RESTORE command will restore the entire multicast configuration */
        case BNX2X_MCAST_CMD_RESTORE:
@@ -3592,7 +3592,7 @@ static int bnx2x_mcast_validate_e1(struct bnx2x *bp,
        /* DEL command deletes all currently configured MACs */
        case BNX2X_MCAST_CMD_DEL:
                o->set_registry_size(o, 0);
-               /* Don't break */
+               /* fall through */
 
        /* RESTORE command will restore the entire multicast configuration */
        case BNX2X_MCAST_CMD_RESTORE:
index dc77bfded8652d7d200a123838cf8e07b5201ff8..62da465377340249af3e7e0671f7802272ba410a 100644 (file)
@@ -1827,6 +1827,7 @@ get_vf:
                DP(BNX2X_MSG_IOV, "got VF [%d:%d] RSS update ramrod\n",
                   vf->abs_vfid, qidx);
                bnx2x_vf_handle_rss_update_eqe(bp, vf);
+               /* fall through */
        case EVENT_RING_OPCODE_VF_FLR:
                /* Do nothing for now */
                return 0;
index 4394c1162be4fde931aa822d69a008b89f54efb8..c612d74451a7fa1730c9e66fa33344f37e844683 100644 (file)
@@ -1727,7 +1727,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
                                            speed);
                }
                set_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, &bp->sp_event);
-               /* fall thru */
+               /* fall through */
        }
        case ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE:
                set_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event);
@@ -3012,13 +3012,6 @@ static void bnxt_free_hwrm_resources(struct bnxt *bp)
                          bp->hwrm_cmd_resp_dma_addr);
 
        bp->hwrm_cmd_resp_addr = NULL;
-       if (bp->hwrm_dbg_resp_addr) {
-               dma_free_coherent(&pdev->dev, HWRM_DBG_REG_BUF_SIZE,
-                                 bp->hwrm_dbg_resp_addr,
-                                 bp->hwrm_dbg_resp_dma_addr);
-
-               bp->hwrm_dbg_resp_addr = NULL;
-       }
 }
 
 static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
@@ -3030,12 +3023,6 @@ static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
                                                   GFP_KERNEL);
        if (!bp->hwrm_cmd_resp_addr)
                return -ENOMEM;
-       bp->hwrm_dbg_resp_addr = dma_alloc_coherent(&pdev->dev,
-                                                   HWRM_DBG_REG_BUF_SIZE,
-                                                   &bp->hwrm_dbg_resp_dma_addr,
-                                                   GFP_KERNEL);
-       if (!bp->hwrm_dbg_resp_addr)
-               netdev_warn(bp->dev, "fail to alloc debug register dma mem\n");
 
        return 0;
 }
@@ -7991,7 +7978,7 @@ static int bnxt_setup_tc_block(struct net_device *dev,
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block, bnxt_setup_tc_block_cb,
-                                            bp, bp);
+                                            bp, bp, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, bnxt_setup_tc_block_cb, bp);
                return 0;
index 91575ef97c8cb119d9407530f4b6f5472d72724c..934aa11c82ebbde8d486b28265821cb61a8cab8b 100644 (file)
@@ -1287,9 +1287,6 @@ struct bnxt {
        dma_addr_t              hwrm_short_cmd_req_dma_addr;
        void                    *hwrm_cmd_resp_addr;
        dma_addr_t              hwrm_cmd_resp_dma_addr;
-       void                    *hwrm_dbg_resp_addr;
-       dma_addr_t              hwrm_dbg_resp_dma_addr;
-#define HWRM_DBG_REG_BUF_SIZE  128
 
        struct rx_port_stats    *hw_rx_port_stats;
        struct tx_port_stats    *hw_tx_port_stats;
index 402fa32f7a8802140513f7cf12501f51e7ca9d0f..7bd96ab4f7c5793e763b7cc9ba135e7242c10827 100644 (file)
@@ -21,16 +21,99 @@ static const struct devlink_ops bnxt_dl_ops = {
 #endif /* CONFIG_BNXT_SRIOV */
 };
 
+static const struct bnxt_dl_nvm_param nvm_params[] = {
+       {DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV, NVM_OFF_ENABLE_SRIOV,
+        BNXT_NVM_SHARED_CFG, 1},
+};
+
+static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
+                            int msg_len, union devlink_param_value *val)
+{
+       struct hwrm_nvm_variable_input *req = msg;
+       void *data_addr = NULL, *buf = NULL;
+       struct bnxt_dl_nvm_param nvm_param;
+       int bytesize, idx = 0, rc, i;
+       dma_addr_t data_dma_addr;
+
+       /* Get/Set NVM CFG parameter is supported only on PFs */
+       if (BNXT_VF(bp))
+               return -EPERM;
+
+       for (i = 0; i < ARRAY_SIZE(nvm_params); i++) {
+               if (nvm_params[i].id == param_id) {
+                       nvm_param = nvm_params[i];
+                       break;
+               }
+       }
+
+       if (nvm_param.dir_type == BNXT_NVM_PORT_CFG)
+               idx = bp->pf.port_id;
+       else if (nvm_param.dir_type == BNXT_NVM_FUNC_CFG)
+               idx = bp->pf.fw_fid - BNXT_FIRST_PF_FID;
+
+       bytesize = roundup(nvm_param.num_bits, BITS_PER_BYTE) / BITS_PER_BYTE;
+       if (nvm_param.num_bits == 1)
+               buf = &val->vbool;
+
+       data_addr = dma_zalloc_coherent(&bp->pdev->dev, bytesize,
+                                       &data_dma_addr, GFP_KERNEL);
+       if (!data_addr)
+               return -ENOMEM;
+
+       req->data_addr = cpu_to_le64(data_dma_addr);
+       req->data_len = cpu_to_le16(nvm_param.num_bits);
+       req->option_num = cpu_to_le16(nvm_param.offset);
+       req->index_0 = cpu_to_le16(idx);
+       if (idx)
+               req->dimensions = cpu_to_le16(1);
+
+       if (req->req_type == HWRM_NVM_SET_VARIABLE)
+               memcpy(data_addr, buf, bytesize);
+
+       rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT);
+       if (!rc && req->req_type == HWRM_NVM_GET_VARIABLE)
+               memcpy(buf, data_addr, bytesize);
+
+       dma_free_coherent(&bp->pdev->dev, bytesize, data_addr, data_dma_addr);
+       if (rc)
+               return -EIO;
+       return 0;
+}
+
+static int bnxt_dl_nvm_param_get(struct devlink *dl, u32 id,
+                                struct devlink_param_gset_ctx *ctx)
+{
+       struct hwrm_nvm_get_variable_input req = {0};
+       struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_VARIABLE, -1, -1);
+       return bnxt_hwrm_nvm_req(bp, id, &req, sizeof(req), &ctx->val);
+}
+
+static int bnxt_dl_nvm_param_set(struct devlink *dl, u32 id,
+                                struct devlink_param_gset_ctx *ctx)
+{
+       struct hwrm_nvm_set_variable_input req = {0};
+       struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_SET_VARIABLE, -1, -1);
+       return bnxt_hwrm_nvm_req(bp, id, &req, sizeof(req), &ctx->val);
+}
+
+static const struct devlink_param bnxt_dl_params[] = {
+       DEVLINK_PARAM_GENERIC(ENABLE_SRIOV,
+                             BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+                             bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set,
+                             NULL),
+};
+
 int bnxt_dl_register(struct bnxt *bp)
 {
        struct devlink *dl;
        int rc;
 
-       if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV))
-               return 0;
-
-       if (bp->hwrm_spec_code < 0x10803) {
-               netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n");
+       if (bp->hwrm_spec_code < 0x10600) {
+               netdev_warn(bp->dev, "Firmware does not support NVM params");
                return -ENOTSUPP;
        }
 
@@ -41,16 +124,34 @@ int bnxt_dl_register(struct bnxt *bp)
        }
 
        bnxt_link_bp_to_dl(bp, dl);
-       bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+
+       /* Add switchdev eswitch mode setting, if SRIOV supported */
+       if (pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV) &&
+           bp->hwrm_spec_code > 0x10803)
+               bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+
        rc = devlink_register(dl, &bp->pdev->dev);
        if (rc) {
-               bnxt_link_bp_to_dl(bp, NULL);
-               devlink_free(dl);
                netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc);
-               return rc;
+               goto err_dl_free;
+       }
+
+       rc = devlink_params_register(dl, bnxt_dl_params,
+                                    ARRAY_SIZE(bnxt_dl_params));
+       if (rc) {
+               netdev_warn(bp->dev, "devlink_params_register failed. rc=%d",
+                           rc);
+               goto err_dl_unreg;
        }
 
        return 0;
+
+err_dl_unreg:
+       devlink_unregister(dl);
+err_dl_free:
+       bnxt_link_bp_to_dl(bp, NULL);
+       devlink_free(dl);
+       return rc;
 }
 
 void bnxt_dl_unregister(struct bnxt *bp)
@@ -60,6 +161,8 @@ void bnxt_dl_unregister(struct bnxt *bp)
        if (!dl)
                return;
 
+       devlink_params_unregister(dl, bnxt_dl_params,
+                                 ARRAY_SIZE(bnxt_dl_params));
        devlink_unregister(dl);
        devlink_free(dl);
 }
index e92a35d8b64204da64e240047a0cb914cc5cb354..2f68dc048390b84300cefd73a974fe3826bb3fdb 100644 (file)
@@ -33,6 +33,21 @@ static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
        }
 }
 
+#define NVM_OFF_ENABLE_SRIOV           401
+
+enum bnxt_nvm_dir_type {
+       BNXT_NVM_SHARED_CFG = 40,
+       BNXT_NVM_PORT_CFG,
+       BNXT_NVM_FUNC_CFG,
+};
+
+struct bnxt_dl_nvm_param {
+       u16 id;
+       u16 offset;
+       u16 dir_type;
+       u16 num_bits;
+};
+
 int bnxt_dl_register(struct bnxt *bp);
 void bnxt_dl_unregister(struct bnxt *bp);
 
index 0fe0ea8dce6c7bd3fd64c9dea98daeb3529c9147..c75d7fa6dab66262783a6e79755c85ddaeb76645 100644 (file)
@@ -6201,6 +6201,19 @@ struct hwrm_nvm_install_update_cmd_err {
        u8      unused_0[7];
 };
 
+struct hwrm_nvm_variable_input {
+       __le16  req_type;
+       __le16  cmpl_ring;
+       __le16  seq_id;
+       __le16  target_id;
+       __le64  resp_addr;
+       __le64  data_addr;
+       __le16  data_len;
+       __le16  option_num;
+       __le16  dimensions;
+       __le16  index_0;
+};
+
 /* hwrm_nvm_get_variable_input (size:320b/40B) */
 struct hwrm_nvm_get_variable_input {
        __le16  req_type;
index a64910892c25e9526c313fa86a7dec0e1b05bf6f..f560845c5a9ddb8126161d266590b81cd6101b59 100644 (file)
@@ -956,9 +956,13 @@ static int bnxt_vf_validate_set_mac(struct bnxt *bp, struct bnxt_vf_info *vf)
        } else if (is_valid_ether_addr(vf->vf_mac_addr)) {
                if (ether_addr_equal((const u8 *)req->l2_addr, vf->vf_mac_addr))
                        mac_ok = true;
-       } else if (bp->hwrm_spec_code < 0x10202) {
-               mac_ok = true;
        } else {
+               /* There are two cases:
+                * 1.If firmware spec < 0x10202,VF MAC address is not forwarded
+                *   to the PF and so it doesn't have to match
+                * 2.Allow VF to modify it's own MAC when PF has not assigned a
+                *   valid MAC address and firmware spec >= 0x10202
+                */
                mac_ok = true;
        }
        if (mac_ok)
index 491bd40a254d8dad8810d983505b69efe2d011b1..139d96c5a02355f557d586f5edcab1712c048036 100644 (file)
@@ -1568,22 +1568,16 @@ void bnxt_tc_flow_stats_work(struct bnxt *bp)
 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
                         struct tc_cls_flower_offload *cls_flower)
 {
-       int rc = 0;
-
        switch (cls_flower->command) {
        case TC_CLSFLOWER_REPLACE:
-               rc = bnxt_tc_add_flow(bp, src_fid, cls_flower);
-               break;
-
+               return bnxt_tc_add_flow(bp, src_fid, cls_flower);
        case TC_CLSFLOWER_DESTROY:
-               rc = bnxt_tc_del_flow(bp, cls_flower);
-               break;
-
+               return bnxt_tc_del_flow(bp, cls_flower);
        case TC_CLSFLOWER_STATS:
-               rc = bnxt_tc_get_flow_stats(bp, cls_flower);
-               break;
+               return bnxt_tc_get_flow_stats(bp, cls_flower);
+       default:
+               return -EOPNOTSUPP;
        }
-       return rc;
 }
 
 static const struct rhashtable_params bnxt_tc_flow_ht_params = {
index 05d4059059062463ee8fa38d16a9a708f96cb411..e31f5d803c1305f71159ca6e2c8bc5ed5f94afa2 100644 (file)
@@ -173,7 +173,7 @@ static int bnxt_vf_rep_setup_tc_block(struct net_device *dev,
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block,
                                             bnxt_vf_rep_setup_tc_block_cb,
-                                            vf_rep, vf_rep);
+                                            vf_rep, vf_rep, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block,
                                        bnxt_vf_rep_setup_tc_block_cb, vf_rep);
@@ -543,9 +543,14 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode)
                break;
 
        case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+               if (bp->hwrm_spec_code < 0x10803) {
+                       netdev_warn(bp->dev, "FW does not support SRIOV E-Switch SWITCHDEV mode\n");
+                       rc = -ENOTSUPP;
+                       goto done;
+               }
+
                if (pci_num_vf(bp->pdev) == 0) {
-                       netdev_info(bp->dev,
-                                   "Enable VFs before setting switchdev mode");
+                       netdev_info(bp->dev, "Enable VFs before setting switchdev mode");
                        rc = -EPERM;
                        goto done;
                }
index 1f0e872d06675878c059ab54980c177937630ef6..0584d07c8c33c53a972a64d37e79984e7eb95499 100644 (file)
@@ -219,7 +219,6 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp)
                rc = bnxt_xdp_set(bp, xdp->prog);
                break;
        case XDP_QUERY_PROG:
-               xdp->prog_attached = !!bp->xdp_prog;
                xdp->prog_id = bp->xdp_prog ? bp->xdp_prog->aux->id : 0;
                rc = 0;
                break;
index 4fd829b5e65d14b56337e63fc480dd72c8420eeb..d83233ae4a15f318ea89acb5da927e71501c2a5e 100644 (file)
@@ -2562,7 +2562,6 @@ static void cnic_bnx2x_delete_wait(struct cnic_dev *dev, u32 start_cid)
 
 static int cnic_bnx2x_fcoe_fw_destroy(struct cnic_dev *dev, struct kwqe *kwqe)
 {
-       struct fcoe_kwqe_destroy *req;
        union l5cm_specific_data l5_data;
        struct cnic_local *cp = dev->cnic_priv;
        struct bnx2x *bp = netdev_priv(dev->netdev);
@@ -2571,7 +2570,6 @@ static int cnic_bnx2x_fcoe_fw_destroy(struct cnic_dev *dev, struct kwqe *kwqe)
 
        cnic_bnx2x_delete_wait(dev, MAX_ISCSI_TBL_SZ);
 
-       req = (struct fcoe_kwqe_destroy *) kwqe;
        cid = BNX2X_HW_CID(bp, cp->fcoe_init_cid);
 
        memset(&l5_data, 0, sizeof(l5_data));
@@ -4090,7 +4088,7 @@ static void cnic_cm_free_mem(struct cnic_dev *dev)
 {
        struct cnic_local *cp = dev->cnic_priv;
 
-       kfree(cp->csk_tbl);
+       kvfree(cp->csk_tbl);
        cp->csk_tbl = NULL;
        cnic_free_id_tbl(&cp->csk_port_tbl);
 }
@@ -4100,8 +4098,8 @@ static int cnic_cm_alloc_mem(struct cnic_dev *dev)
        struct cnic_local *cp = dev->cnic_priv;
        u32 port_id;
 
-       cp->csk_tbl = kcalloc(MAX_CM_SK_TBL_SZ, sizeof(struct cnic_sock),
-                             GFP_KERNEL);
+       cp->csk_tbl = kvcalloc(MAX_CM_SK_TBL_SZ, sizeof(struct cnic_sock),
+                              GFP_KERNEL);
        if (!cp->csk_tbl)
                return -ENOMEM;
 
@@ -5091,13 +5089,12 @@ static int cnic_start_bnx2x_hw(struct cnic_dev *dev)
        struct cnic_local *cp = dev->cnic_priv;
        struct bnx2x *bp = netdev_priv(dev->netdev);
        struct cnic_eth_dev *ethdev = cp->ethdev;
-       int func, ret;
+       int ret;
        u32 pfid;
 
        dev->stats_addr = ethdev->addr_drv_info_to_mcp;
        cp->func = bp->pf_num;
 
-       func = CNIC_FUNC(cp);
        pfid = bp->pfid;
 
        ret = cnic_init_id_tbl(&cp->cid_tbl, MAX_ISCSI_TBL_SZ,
index aa1374d0af9313dfdbf6a7f8dfeea92e2fee7013..d8dad07f826a01eb4941f2ecdbe203a4f693ebd8 100644 (file)
@@ -725,6 +725,7 @@ static int tg3_ape_lock(struct tg3 *tp, int locknum)
        case TG3_APE_LOCK_GPIO:
                if (tg3_asic_rev(tp) == ASIC_REV_5761)
                        return 0;
+               /* else: fall through */
        case TG3_APE_LOCK_GRC:
        case TG3_APE_LOCK_MEM:
                if (!tp->pci_fn)
@@ -785,6 +786,7 @@ static void tg3_ape_unlock(struct tg3 *tp, int locknum)
        case TG3_APE_LOCK_GPIO:
                if (tg3_asic_rev(tp) == ASIC_REV_5761)
                        return;
+               /* else: fall through */
        case TG3_APE_LOCK_GRC:
        case TG3_APE_LOCK_MEM:
                if (!tp->pci_fn)
@@ -10719,28 +10721,40 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy)
        switch (limit) {
        case 16:
                tw32(MAC_RCV_RULE_15,  0); tw32(MAC_RCV_VALUE_15,  0);
+               /* fall through */
        case 15:
                tw32(MAC_RCV_RULE_14,  0); tw32(MAC_RCV_VALUE_14,  0);
+               /* fall through */
        case 14:
                tw32(MAC_RCV_RULE_13,  0); tw32(MAC_RCV_VALUE_13,  0);
+               /* fall through */
        case 13:
                tw32(MAC_RCV_RULE_12,  0); tw32(MAC_RCV_VALUE_12,  0);
+               /* fall through */
        case 12:
                tw32(MAC_RCV_RULE_11,  0); tw32(MAC_RCV_VALUE_11,  0);
+               /* fall through */
        case 11:
                tw32(MAC_RCV_RULE_10,  0); tw32(MAC_RCV_VALUE_10,  0);
+               /* fall through */
        case 10:
                tw32(MAC_RCV_RULE_9,  0); tw32(MAC_RCV_VALUE_9,  0);
+               /* fall through */
        case 9:
                tw32(MAC_RCV_RULE_8,  0); tw32(MAC_RCV_VALUE_8,  0);
+               /* fall through */
        case 8:
                tw32(MAC_RCV_RULE_7,  0); tw32(MAC_RCV_VALUE_7,  0);
+               /* fall through */
        case 7:
                tw32(MAC_RCV_RULE_6,  0); tw32(MAC_RCV_VALUE_6,  0);
+               /* fall through */
        case 6:
                tw32(MAC_RCV_RULE_5,  0); tw32(MAC_RCV_VALUE_5,  0);
+               /* fall through */
        case 5:
                tw32(MAC_RCV_RULE_4,  0); tw32(MAC_RCV_VALUE_4,  0);
+               /* fall through */
        case 4:
                /* tw32(MAC_RCV_RULE_3,  0); tw32(MAC_RCV_VALUE_3,  0); */
        case 3:
index 427d65a1a1261095a402b833660c83da0113e4c7..b9984015ca8c1a680d6576dad3864aa2c7f99854 100644 (file)
@@ -2,7 +2,7 @@
 # Atmel device configuration
 #
 
-config NET_CADENCE
+config NET_VENDOR_CADENCE
        bool "Cadence devices"
        depends on HAS_IOMEM
        default y
@@ -16,7 +16,7 @@ config NET_CADENCE
          the remaining Atmel network card questions. If you say Y, you will be
          asked for your specific card in the following questions.
 
-if NET_CADENCE
+if NET_VENDOR_CADENCE
 
 config MACB
        tristate "Cadence MACB/GEM support"
@@ -48,4 +48,4 @@ config MACB_PCI
          To compile this driver as a module, choose M here: the module
          will be called macb_pci.
 
-endif # NET_CADENCE
+endif # NET_VENDOR_CADENCE
index 92d88c5f76fb8b68e9f8b35ada37d4a77d68f739..5f03199a3acf28940d78e515820d7af51e3cee4b 100644 (file)
@@ -4,7 +4,6 @@
 
 config NET_VENDOR_CAVIUM
        bool "Cavium ethernet drivers"
-       depends on PCI
        default y
        ---help---
          Select this option if you want enable Cavium network support.
@@ -36,7 +35,7 @@ config        THUNDER_NIC_BGX
        tristate "Thunder MAC interface driver (BGX)"
        depends on 64BIT && PCI
        select PHYLIB
-       select MDIO_THUNDER
+       select MDIO_THUNDER if PCI
        select THUNDER_NIC_RGX
        ---help---
          This driver supports programming and controlling of MAC
@@ -46,7 +45,7 @@ config        THUNDER_NIC_RGX
        tristate "Thunder MAC interface driver (RGX)"
        depends on 64BIT && PCI
        select PHYLIB
-       select MDIO_THUNDER
+       select MDIO_THUNDER if PCI
        ---help---
          This driver supports configuring XCV block of RGX interface
          present on CN81XX chip.
@@ -67,6 +66,7 @@ config LIQUIDIO
        tristate "Cavium LiquidIO support"
        depends on 64BIT && PCI
        depends on MAY_USE_DEVLINK
+       depends on PCI
        imply PTP_1588_CLOCK
        select FW_LOADER
        select LIBCRC32C
index 929d485a3a2fea6b7f13f389c9e90cda183da737..9f4f3c1d504341d6387ee8be598a0a02c3c1fd68 100644 (file)
@@ -493,6 +493,9 @@ static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct)
        for (q_no = srn; q_no < ern; q_no++) {
                reg_val = octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no));
 
+               /* clear IPTR */
+               reg_val &= ~CN23XX_PKT_OUTPUT_CTL_IPTR;
+
                /* set DPTR */
                reg_val |= CN23XX_PKT_OUTPUT_CTL_DPTR;
 
@@ -1414,50 +1417,6 @@ int validate_cn23xx_pf_config_info(struct octeon_device *oct,
        return 0;
 }
 
-void cn23xx_dump_iq_regs(struct octeon_device *oct)
-{
-       u32 regval, q_no;
-
-       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_DOORBELL_0 [0x%x]: 0x%016llx\n",
-               CN23XX_SLI_IQ_DOORBELL(0),
-               CVM_CAST64(octeon_read_csr64
-                       (oct, CN23XX_SLI_IQ_DOORBELL(0))));
-
-       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_BASEADDR_0 [0x%x]: 0x%016llx\n",
-               CN23XX_SLI_IQ_BASE_ADDR64(0),
-               CVM_CAST64(octeon_read_csr64
-                       (oct, CN23XX_SLI_IQ_BASE_ADDR64(0))));
-
-       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_FIFO_RSIZE_0 [0x%x]: 0x%016llx\n",
-               CN23XX_SLI_IQ_SIZE(0),
-               CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_IQ_SIZE(0))));
-
-       dev_dbg(&oct->pci_dev->dev, "SLI_CTL_STATUS [0x%x]: 0x%016llx\n",
-               CN23XX_SLI_CTL_STATUS,
-               CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_CTL_STATUS)));
-
-       for (q_no = 0; q_no < CN23XX_MAX_INPUT_QUEUES; q_no++) {
-               dev_dbg(&oct->pci_dev->dev, "SLI_PKT[%d]_INPUT_CTL [0x%x]: 0x%016llx\n",
-                       q_no, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
-                       CVM_CAST64(octeon_read_csr64
-                               (oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no))));
-       }
-
-       pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
-       dev_dbg(&oct->pci_dev->dev, "Config DevCtl [0x%x]: 0x%08x\n",
-               CN23XX_CONFIG_PCIE_DEVCTL, regval);
-
-       dev_dbg(&oct->pci_dev->dev, "SLI_PRT[%d]_CFG [0x%llx]: 0x%016llx\n",
-               oct->pcie_port, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port),
-               CVM_CAST64(lio_pci_readq(
-                       oct, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port))));
-
-       dev_dbg(&oct->pci_dev->dev, "SLI_S2M_PORT[%d]_CTL [0x%x]: 0x%016llx\n",
-               oct->pcie_port, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port),
-               CVM_CAST64(octeon_read_csr64(
-                       oct, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port))));
-}
-
 int cn23xx_fw_loaded(struct octeon_device *oct)
 {
        u64 val;
index 9338a00083788059736edefd472710c00b30f9af..962bb62933db3900cbdc8501ebf54d3c87acf5f0 100644 (file)
@@ -165,6 +165,9 @@ static void cn23xx_vf_setup_global_output_regs(struct octeon_device *oct)
                reg_val =
                    octeon_read_csr(oct, CN23XX_VF_SLI_OQ_PKT_CONTROL(q_no));
 
+               /* clear IPTR */
+               reg_val &= ~CN23XX_PKT_OUTPUT_CTL_IPTR;
+
                /* set DPTR */
                reg_val |= CN23XX_PKT_OUTPUT_CTL_DPTR;
 
@@ -379,7 +382,7 @@ void cn23xx_vf_ask_pf_to_do_flr(struct octeon_device *oct)
        mbox_cmd.recv_len = 0;
        mbox_cmd.recv_status = 0;
        mbox_cmd.fn = NULL;
-       mbox_cmd.fn_arg = 0;
+       mbox_cmd.fn_arg = NULL;
 
        octeon_mbox_write(oct, &mbox_cmd);
 }
@@ -679,33 +682,3 @@ int cn23xx_setup_octeon_vf_device(struct octeon_device *oct)
 
        return 0;
 }
-
-void cn23xx_dump_vf_iq_regs(struct octeon_device *oct)
-{
-       u32 regval, q_no;
-
-       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_DOORBELL_0 [0x%x]: 0x%016llx\n",
-               CN23XX_VF_SLI_IQ_DOORBELL(0),
-               CVM_CAST64(octeon_read_csr64(
-                                       oct, CN23XX_VF_SLI_IQ_DOORBELL(0))));
-
-       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_BASEADDR_0 [0x%x]: 0x%016llx\n",
-               CN23XX_VF_SLI_IQ_BASE_ADDR64(0),
-               CVM_CAST64(octeon_read_csr64(
-                       oct, CN23XX_VF_SLI_IQ_BASE_ADDR64(0))));
-
-       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_FIFO_RSIZE_0 [0x%x]: 0x%016llx\n",
-               CN23XX_VF_SLI_IQ_SIZE(0),
-               CVM_CAST64(octeon_read_csr64(oct, CN23XX_VF_SLI_IQ_SIZE(0))));
-
-       for (q_no = 0; q_no < oct->sriov_info.rings_per_vf; q_no++) {
-               dev_dbg(&oct->pci_dev->dev, "SLI_PKT[%d]_INPUT_CTL [0x%x]: 0x%016llx\n",
-                       q_no, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no),
-                       CVM_CAST64(octeon_read_csr64(
-                               oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no))));
-       }
-
-       pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
-       dev_dbg(&oct->pci_dev->dev, "Config DevCtl [0x%x]: 0x%08x\n",
-               CN23XX_CONFIG_PCIE_DEVCTL, regval);
-}
index 7e8454d3b1ad3f382f778c27058695c34b9f13cb..8ef87a76692b285485d0bbaec74dc483d8762e81 100644 (file)
@@ -687,7 +687,7 @@ static void lio_sync_octeon_time(struct work_struct *work)
        lt = (struct lio_time *)sc->virtdptr;
 
        /* Get time of the day */
-       getnstimeofday64(&ts);
+       ktime_get_real_ts64(&ts);
        lt->sec = ts.tv_sec;
        lt->nsec = ts.tv_nsec;
        octeon_swap_8B_data((u64 *)lt, (sizeof(struct lio_time)) / 8);
@@ -2631,7 +2631,7 @@ static int liquidio_vlan_rx_kill_vid(struct net_device *netdev,
 
        ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
        if (ret < 0) {
-               dev_err(&oct->pci_dev->dev, "Add VLAN filter failed in core (ret: 0x%x)\n",
+               dev_err(&oct->pci_dev->dev, "Del VLAN filter failed in core (ret: 0x%x)\n",
                        ret);
        }
        return ret;
@@ -2909,7 +2909,7 @@ static int liquidio_set_vf_vlan(struct net_device *netdev, int vfidx,
            vfidx + 1; /* vfidx is 0 based, but vf_num (param2) is 1 based */
        nctrl.ncmd.s.more = 0;
        nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-       nctrl.cb_fn = 0;
+       nctrl.cb_fn = NULL;
        nctrl.wait_time = LIO_CMD_WAIT_TM;
 
        octnet_send_nic_ctrl_pkt(oct, &nctrl);
@@ -3068,7 +3068,7 @@ static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx,
        nctrl.ncmd.s.param2 = linkstate;
        nctrl.ncmd.s.more = 0;
        nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-       nctrl.cb_fn = 0;
+       nctrl.cb_fn = NULL;
        nctrl.wait_time = LIO_CMD_WAIT_TM;
 
        octnet_send_nic_ctrl_pkt(oct, &nctrl);
@@ -3302,7 +3302,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 {
        struct lio *lio = NULL;
        struct net_device *netdev;
-       u8 mac[6], i, j, *fw_ver;
+       u8 mac[6], i, j, *fw_ver, *micro_ver;
+       unsigned long micro;
+       u32 cur_ver;
        struct octeon_soft_command *sc;
        struct liquidio_if_cfg_context *ctx;
        struct liquidio_if_cfg_resp *resp;
@@ -3432,6 +3434,14 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                                 fw_ver);
                }
 
+               /* extract micro version field; point past '<maj>.<min>.' */
+               micro_ver = fw_ver + strlen(LIQUIDIO_BASE_VERSION) + 1;
+               if (kstrtoul(micro_ver, 10, &micro) != 0)
+                       micro = 0;
+               octeon_dev->fw_info.ver.maj = LIQUIDIO_BASE_MAJOR_VERSION;
+               octeon_dev->fw_info.ver.min = LIQUIDIO_BASE_MINOR_VERSION;
+               octeon_dev->fw_info.ver.rev = micro;
+
                octeon_swap_8B_data((u64 *)(&resp->cfg_info),
                                    (sizeof(struct liquidio_if_cfg_info)) >> 3);
 
@@ -3572,9 +3582,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                for (j = 0; j < octeon_dev->sriov_info.max_vfs; j++) {
                        u8 vfmac[ETH_ALEN];
 
-                       random_ether_addr(&vfmac[0]);
-                       if (__liquidio_set_vf_mac(netdev, j,
-                                                 &vfmac[0], false)) {
+                       eth_random_addr(vfmac);
+                       if (__liquidio_set_vf_mac(netdev, j, vfmac, false)) {
                                dev_err(&octeon_dev->pci_dev->dev,
                                        "Error setting VF%d MAC address\n",
                                        j);
@@ -3675,7 +3684,19 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                        OCTEON_CN2350_25GB_SUBSYS_ID ||
                    octeon_dev->subsystem_id ==
                        OCTEON_CN2360_25GB_SUBSYS_ID) {
-                       liquidio_get_speed(lio);
+                       cur_ver = OCT_FW_VER(octeon_dev->fw_info.ver.maj,
+                                            octeon_dev->fw_info.ver.min,
+                                            octeon_dev->fw_info.ver.rev);
+
+                       /* speed control unsupported in f/w older than 1.7.2 */
+                       if (cur_ver < OCT_FW_VER(1, 7, 2)) {
+                               dev_info(&octeon_dev->pci_dev->dev,
+                                        "speed setting not supported by f/w.");
+                               octeon_dev->speed_setting = 25;
+                               octeon_dev->no_speed_setting = 1;
+                       } else {
+                               liquidio_get_speed(lio);
+                       }
 
                        if (octeon_dev->speed_setting == 0) {
                                octeon_dev->speed_setting = 25;
index 7fa0212873aceaf876dd144a2d5e06dd199659b6..b77835724dc84d037c88bcb9ef7153db8f1f6e48 100644 (file)
@@ -1693,7 +1693,7 @@ liquidio_vlan_rx_kill_vid(struct net_device *netdev,
 
        ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
        if (ret < 0) {
-               dev_err(&oct->pci_dev->dev, "Add VLAN filter failed in core (ret: 0x%x)\n",
+               dev_err(&oct->pci_dev->dev, "Del VLAN filter failed in core (ret: 0x%x)\n",
                        ret);
        }
        return ret;
index 7f97ae48efed74a306ec4ec0851761435ae4344d..0cc2338d8d2a81216d7a91e0d48ad704b7921fbd 100644 (file)
@@ -902,7 +902,7 @@ int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
         *
         * Octeon always uses UTC time. so timezone information is not sent.
         */
-       getnstimeofday64(&ts);
+       ktime_get_real_ts64(&ts);
        ret = snprintf(boottime, MAX_BOOTTIME_SIZE,
                       " time_sec=%lld time_nsec=%ld",
                       (s64)ts.tv_sec, ts.tv_nsec);
index 94a4ed88d6188ca4ed44ab5ebd2673832d33c254..d99ca6ba23a4f4e9f182e7e491fec6066a2f9a04 100644 (file)
@@ -288,8 +288,17 @@ struct oct_fw_info {
         */
        u32 app_mode;
        char   liquidio_firmware_version[32];
+       /* Fields extracted from legacy string 'liquidio_firmware_version' */
+       struct {
+               u8  maj;
+               u8  min;
+               u8  rev;
+       } ver;
 };
 
+#define OCT_FW_VER(maj, min, rev) \
+       (((u32)(maj) << 16) | ((u32)(min) << 8) | ((u32)(rev)))
+
 /* wrappers around work structs */
 struct cavium_wk {
        struct delayed_work work;
index 1f2e75da28f833c2f6973670fa5d113184b606fc..d5d9e47daa4bedfde9e79b13cea25453d2a67a29 100644 (file)
@@ -110,8 +110,8 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 
        memset(iq->request_list, 0, sizeof(*iq->request_list) * num_descs);
 
-       dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %llx count: %d\n",
-               iq_no, iq->base_addr, iq->base_addr_dma, iq->max_count);
+       dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %pad count: %d\n",
+               iq_no, iq->base_addr, &iq->base_addr_dma, iq->max_count);
 
        iq->txpciq.u64 = txpciq.u64;
        iq->fill_threshold = (u32)conf->db_min;
index 135766c4296b737c7ffbf34026d61e3f0cf9d13d..768f584f8392732b19d6e889ed456a3c6de8e809 100644 (file)
@@ -1848,7 +1848,6 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
        case XDP_SETUP_PROG:
                return nicvf_xdp_setup(nic, xdp->prog);
        case XDP_QUERY_PROG:
-               xdp->prog_attached = !!nic->xdp_prog;
                xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0;
                return 0;
        default:
index 3c5057868ab3a94758529375b4125da02a47fb8f..36d25883d12306cdb1270518e24dbaaf0d7dde5f 100644 (file)
@@ -120,6 +120,8 @@ struct cudbg_mem_desc {
        u32 idx;
 };
 
+#define CUDBG_MEMINFO_REV 1
+
 struct cudbg_meminfo {
        struct cudbg_mem_desc avail[4];
        struct cudbg_mem_desc mem[ARRAY_SIZE(cudbg_region) + 3];
@@ -137,6 +139,9 @@ struct cudbg_meminfo {
        u32 port_alloc[4];
        u32 loopback_used[NCHAN];
        u32 loopback_alloc[NCHAN];
+       u32 p_structs_free_cnt;
+       u32 free_rx_cnt;
+       u32 free_tx_cnt;
 };
 
 struct cudbg_cim_pif_la {
@@ -281,12 +286,18 @@ struct cudbg_tid_data {
 
 #define CUDBG_NUM_ULPTX 11
 #define CUDBG_NUM_ULPTX_READ 512
+#define CUDBG_NUM_ULPTX_ASIC 6
+#define CUDBG_NUM_ULPTX_ASIC_READ 128
+
+#define CUDBG_ULPTX_LA_REV 1
 
 struct cudbg_ulptx_la {
        u32 rdptr[CUDBG_NUM_ULPTX];
        u32 wrptr[CUDBG_NUM_ULPTX];
        u32 rddata[CUDBG_NUM_ULPTX];
        u32 rd_data[CUDBG_NUM_ULPTX][CUDBG_NUM_ULPTX_READ];
+       u32 rdptr_asic[CUDBG_NUM_ULPTX_ASIC_READ];
+       u32 rddata_asic[CUDBG_NUM_ULPTX_ASIC_READ][CUDBG_NUM_ULPTX_ASIC];
 };
 
 #define CUDBG_CHAC_PBT_ADDR 0x2800
index 0afcfe99bff304acaf2a701d2e61db40e6629074..d97e0d7e541afde772cf669a98f58d9a5a6eb626 100644 (file)
@@ -349,6 +349,11 @@ int cudbg_fill_meminfo(struct adapter *padap,
        meminfo_buff->up_extmem2_hi = hi;
 
        lo = t4_read_reg(padap, TP_PMM_RX_MAX_PAGE_A);
+       for (i = 0, meminfo_buff->free_rx_cnt = 0; i < 2; i++)
+               meminfo_buff->free_rx_cnt +=
+                       FREERXPAGECOUNT_G(t4_read_reg(padap,
+                                                     TP_FLM_FREE_RX_CNT_A));
+
        meminfo_buff->rx_pages_data[0] =  PMRXMAXPAGE_G(lo);
        meminfo_buff->rx_pages_data[1] =
                t4_read_reg(padap, TP_PMM_RX_PAGE_SIZE_A) >> 10;
@@ -356,6 +361,11 @@ int cudbg_fill_meminfo(struct adapter *padap,
 
        lo = t4_read_reg(padap, TP_PMM_TX_MAX_PAGE_A);
        hi = t4_read_reg(padap, TP_PMM_TX_PAGE_SIZE_A);
+       for (i = 0, meminfo_buff->free_tx_cnt = 0; i < 4; i++)
+               meminfo_buff->free_tx_cnt +=
+                       FREETXPAGECOUNT_G(t4_read_reg(padap,
+                                                     TP_FLM_FREE_TX_CNT_A));
+
        meminfo_buff->tx_pages_data[0] = PMTXMAXPAGE_G(lo);
        meminfo_buff->tx_pages_data[1] =
                hi >= (1 << 20) ? (hi >> 20) : (hi >> 10);
@@ -364,6 +374,8 @@ int cudbg_fill_meminfo(struct adapter *padap,
        meminfo_buff->tx_pages_data[3] = 1 << PMTXNUMCHN_G(lo);
 
        meminfo_buff->p_structs = t4_read_reg(padap, TP_CMM_MM_MAX_PSTRUCT_A);
+       meminfo_buff->p_structs_free_cnt =
+               FREEPSTRUCTCOUNT_G(t4_read_reg(padap, TP_FLM_FREE_PS_CNT_A));
 
        for (i = 0; i < 4; i++) {
                if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5)
@@ -1465,14 +1477,23 @@ int cudbg_collect_meminfo(struct cudbg_init *pdbg_init,
        struct adapter *padap = pdbg_init->adap;
        struct cudbg_buffer temp_buff = { 0 };
        struct cudbg_meminfo *meminfo_buff;
+       struct cudbg_ver_hdr *ver_hdr;
        int rc;
 
-       rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_meminfo),
+       rc = cudbg_get_buff(pdbg_init, dbg_buff,
+                           sizeof(struct cudbg_ver_hdr) +
+                           sizeof(struct cudbg_meminfo),
                            &temp_buff);
        if (rc)
                return rc;
 
-       meminfo_buff = (struct cudbg_meminfo *)temp_buff.data;
+       ver_hdr = (struct cudbg_ver_hdr *)temp_buff.data;
+       ver_hdr->signature = CUDBG_ENTITY_SIGNATURE;
+       ver_hdr->revision = CUDBG_MEMINFO_REV;
+       ver_hdr->size = sizeof(struct cudbg_meminfo);
+
+       meminfo_buff = (struct cudbg_meminfo *)(temp_buff.data +
+                                               sizeof(*ver_hdr));
        rc = cudbg_fill_meminfo(padap, meminfo_buff);
        if (rc) {
                cudbg_err->sys_err = rc;
@@ -2586,15 +2607,24 @@ int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init,
        struct adapter *padap = pdbg_init->adap;
        struct cudbg_buffer temp_buff = { 0 };
        struct cudbg_ulptx_la *ulptx_la_buff;
+       struct cudbg_ver_hdr *ver_hdr;
        u32 i, j;
        int rc;
 
-       rc = cudbg_get_buff(pdbg_init, dbg_buff, sizeof(struct cudbg_ulptx_la),
+       rc = cudbg_get_buff(pdbg_init, dbg_buff,
+                           sizeof(struct cudbg_ver_hdr) +
+                           sizeof(struct cudbg_ulptx_la),
                            &temp_buff);
        if (rc)
                return rc;
 
-       ulptx_la_buff = (struct cudbg_ulptx_la *)temp_buff.data;
+       ver_hdr = (struct cudbg_ver_hdr *)temp_buff.data;
+       ver_hdr->signature = CUDBG_ENTITY_SIGNATURE;
+       ver_hdr->revision = CUDBG_ULPTX_LA_REV;
+       ver_hdr->size = sizeof(struct cudbg_ulptx_la);
+
+       ulptx_la_buff = (struct cudbg_ulptx_la *)(temp_buff.data +
+                                                 sizeof(*ver_hdr));
        for (i = 0; i < CUDBG_NUM_ULPTX; i++) {
                ulptx_la_buff->rdptr[i] = t4_read_reg(padap,
                                                      ULP_TX_LA_RDPTR_0_A +
@@ -2610,6 +2640,25 @@ int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init,
                                t4_read_reg(padap,
                                            ULP_TX_LA_RDDATA_0_A + 0x10 * i);
        }
+
+       for (i = 0; i < CUDBG_NUM_ULPTX_ASIC_READ; i++) {
+               t4_write_reg(padap, ULP_TX_ASIC_DEBUG_CTRL_A, 0x1);
+               ulptx_la_buff->rdptr_asic[i] =
+                               t4_read_reg(padap, ULP_TX_ASIC_DEBUG_CTRL_A);
+               ulptx_la_buff->rddata_asic[i][0] =
+                               t4_read_reg(padap, ULP_TX_ASIC_DEBUG_0_A);
+               ulptx_la_buff->rddata_asic[i][1] =
+                               t4_read_reg(padap, ULP_TX_ASIC_DEBUG_1_A);
+               ulptx_la_buff->rddata_asic[i][2] =
+                               t4_read_reg(padap, ULP_TX_ASIC_DEBUG_2_A);
+               ulptx_la_buff->rddata_asic[i][3] =
+                               t4_read_reg(padap, ULP_TX_ASIC_DEBUG_3_A);
+               ulptx_la_buff->rddata_asic[i][4] =
+                               t4_read_reg(padap, ULP_TX_ASIC_DEBUG_4_A);
+               ulptx_la_buff->rddata_asic[i][5] =
+                               t4_read_reg(padap, PM_RX_BASE_ADDR);
+       }
+
        return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff);
 }
 
index 0dbe2d9e22d60d12a2e04770210d2c56fe4279ce..3da9299cd786e399b31852bbbf9544a9fc57c8fb 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
+#include <linux/rhashtable.h>
 #include <linux/etherdevice.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
@@ -319,6 +320,21 @@ struct vpd_params {
        u8 na[MACADDR_LEN + 1];
 };
 
+/* Maximum resources provisioned for a PCI PF.
+ */
+struct pf_resources {
+       unsigned int nvi;               /* N virtual interfaces */
+       unsigned int neq;               /* N egress Qs */
+       unsigned int nethctrl;          /* N egress ETH or CTRL Qs */
+       unsigned int niqflint;          /* N ingress Qs/w free list(s) & intr */
+       unsigned int niq;               /* N ingress Qs */
+       unsigned int tc;                /* PCI-E traffic class */
+       unsigned int pmask;             /* port access rights mask */
+       unsigned int nexactf;           /* N exact MPS filters */
+       unsigned int r_caps;            /* read capabilities */
+       unsigned int wx_caps;           /* write/execute capabilities */
+};
+
 struct pci_params {
        unsigned int vpd_cap_addr;
        unsigned char speed;
@@ -346,6 +362,7 @@ struct adapter_params {
        struct sge_params sge;
        struct tp_params  tp;
        struct vpd_params vpd;
+       struct pf_resources pfres;
        struct pci_params pci;
        struct devlog_params devlog;
        enum pcie_memwin drv_memwin;
@@ -521,6 +538,15 @@ enum {
        MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS,
 };
 
+enum {
+       PRIV_FLAG_PORT_TX_VM_BIT,
+};
+
+#define PRIV_FLAG_PORT_TX_VM           BIT(PRIV_FLAG_PORT_TX_VM_BIT)
+
+#define PRIV_FLAGS_ADAP                        0
+#define PRIV_FLAGS_PORT                        PRIV_FLAG_PORT_TX_VM
+
 struct adapter;
 struct sge_rspq;
 
@@ -557,6 +583,7 @@ struct port_info {
        struct hwtstamp_config tstamp_config;
        bool ptp_enable;
        struct sched_table *sched_tbl;
+       u32 eth_flags;
 };
 
 struct dentry;
@@ -867,6 +894,7 @@ struct adapter {
        unsigned int flags;
        unsigned int adap_idx;
        enum chip_type chip;
+       u32 eth_flags;
 
        int msg_enable;
        __be16 vxlan_port;
@@ -956,6 +984,7 @@ struct adapter {
        struct chcr_stats_debug chcr_stats;
 
        /* TC flower offload */
+       bool tc_flower_initialized;
        struct rhashtable flower_tbl;
        struct rhashtable_params flower_ht_params;
        struct timer_list flower_stats_timer;
@@ -1333,7 +1362,7 @@ void t4_os_link_changed(struct adapter *adap, int port_id, int link_stat);
 void t4_free_sge_resources(struct adapter *adap);
 void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q);
 irq_handler_t t4_intr_handler(struct adapter *adap);
-netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
                     const struct pkt_gl *gl);
 int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb);
@@ -1555,6 +1584,7 @@ int t4_eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz);
 int t4_seeprom_wp(struct adapter *adapter, bool enable);
 int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p);
 int t4_get_vpd_params(struct adapter *adapter, struct vpd_params *p);
+int t4_get_pfres(struct adapter *adapter);
 int t4_read_flash(struct adapter *adapter, unsigned int addr,
                  unsigned int nwords, u32 *data, int byte_oriented);
 int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size);
index 8d751efcb90e58b5161a491921d204ce197870c2..5f01c0a7fd98d90406128e971633d29c16c7d7b2 100644 (file)
@@ -224,7 +224,8 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
                len = sizeof(struct cudbg_tp_la) + TPLA_SIZE * sizeof(u64);
                break;
        case CUDBG_MEMINFO:
-               len = sizeof(struct cudbg_meminfo);
+               len = sizeof(struct cudbg_ver_hdr) +
+                     sizeof(struct cudbg_meminfo);
                break;
        case CUDBG_CIM_PIF_LA:
                len = sizeof(struct cudbg_cim_pif_la);
@@ -273,7 +274,8 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
                }
                break;
        case CUDBG_ULPTX_LA:
-               len = sizeof(struct cudbg_ulptx_la);
+               len = sizeof(struct cudbg_ver_hdr) +
+                     sizeof(struct cudbg_ulptx_la);
                break;
        case CUDBG_UP_CIM_INDIRECT:
                n = 0;
index c301aaf79d647d2c0f2a1b08ebc1823d6079377f..6f312e03432fcadf7592757756e23728377e2cc1 100644 (file)
@@ -2414,6 +2414,44 @@ static const struct file_operations rss_vf_config_debugfs_fops = {
        .release = seq_release_private
 };
 
+static int resources_show(struct seq_file *seq, void *v)
+{
+       struct adapter *adapter = seq->private;
+       struct pf_resources *pfres = &adapter->params.pfres;
+
+       #define S(desc, fmt, var) \
+               seq_printf(seq, "%-60s " fmt "\n", \
+                          desc " (" #var "):", pfres->var)
+
+       S("Virtual Interfaces", "%d", nvi);
+       S("Egress Queues", "%d", neq);
+       S("Ethernet Control", "%d", nethctrl);
+       S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
+       S("Ingress Queues", "%d", niq);
+       S("Traffic Class", "%d", tc);
+       S("Port Access Rights Mask", "%#x", pmask);
+       S("MAC Address Filters", "%d", nexactf);
+       S("Firmware Command Read Capabilities", "%#x", r_caps);
+       S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
+
+       #undef S
+
+       return 0;
+}
+
+static int resources_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, resources_show, inode->i_private);
+}
+
+static const struct file_operations resources_debugfs_fops = {
+       .owner   = THIS_MODULE,
+       .open    = resources_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release,
+};
+
 /**
  * ethqset2pinfo - return port_info of an Ethernet Queue Set
  * @adap: the adapter
@@ -2436,16 +2474,64 @@ static inline struct port_info *ethqset2pinfo(struct adapter *adap, int qset)
        return NULL;
 }
 
+static int sge_qinfo_uld_txq_entries(const struct adapter *adap, int uld)
+{
+       const struct sge_uld_txq_info *utxq_info = adap->sge.uld_txq_info[uld];
+
+       if (!utxq_info)
+               return 0;
+
+       return DIV_ROUND_UP(utxq_info->ntxq, 4);
+}
+
+static int sge_qinfo_uld_rspq_entries(const struct adapter *adap, int uld,
+                                     bool ciq)
+{
+       const struct sge_uld_rxq_info *urxq_info = adap->sge.uld_rxq_info[uld];
+
+       if (!urxq_info)
+               return 0;
+
+       return ciq ? DIV_ROUND_UP(urxq_info->nciq, 4) :
+                    DIV_ROUND_UP(urxq_info->nrxq, 4);
+}
+
+static int sge_qinfo_uld_rxq_entries(const struct adapter *adap, int uld)
+{
+       return sge_qinfo_uld_rspq_entries(adap, uld, false);
+}
+
+static int sge_qinfo_uld_ciq_entries(const struct adapter *adap, int uld)
+{
+       return sge_qinfo_uld_rspq_entries(adap, uld, true);
+}
+
 static int sge_qinfo_show(struct seq_file *seq, void *v)
 {
+       int uld_rxq_entries[CXGB4_ULD_MAX] = { 0 };
+       int uld_ciq_entries[CXGB4_ULD_MAX] = { 0 };
+       int uld_txq_entries[CXGB4_TX_MAX] = { 0 };
+       const struct sge_uld_txq_info *utxq_info;
+       const struct sge_uld_rxq_info *urxq_info;
        struct adapter *adap = seq->private;
-       int eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
-       int ofld_entries = DIV_ROUND_UP(adap->sge.ofldqsets, 4);
-       int ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
-       int i, r = (uintptr_t)v - 1;
-       int ofld_idx = r - eth_entries;
-       int ctrl_idx =  ofld_idx - ofld_entries;
-       int fq_idx =  ctrl_idx - ctrl_entries;
+       int i, n, r = (uintptr_t)v - 1;
+       int eth_entries, ctrl_entries;
+       struct sge *s = &adap->sge;
+
+       eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
+       ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
+
+       mutex_lock(&uld_mutex);
+       if (s->uld_txq_info)
+               for (i = 0; i < ARRAY_SIZE(uld_txq_entries); i++)
+                       uld_txq_entries[i] = sge_qinfo_uld_txq_entries(adap, i);
+
+       if (s->uld_rxq_info) {
+               for (i = 0; i < ARRAY_SIZE(uld_rxq_entries); i++) {
+                       uld_rxq_entries[i] = sge_qinfo_uld_rxq_entries(adap, i);
+                       uld_ciq_entries[i] = sge_qinfo_uld_ciq_entries(adap, i);
+               }
+       }
 
        if (r)
                seq_putc(seq, '\n');
@@ -2467,9 +2553,10 @@ do { \
 
        if (r < eth_entries) {
                int base_qset = r * 4;
-               const struct sge_eth_rxq *rx = &adap->sge.ethrxq[base_qset];
-               const struct sge_eth_txq *tx = &adap->sge.ethtxq[base_qset];
-               int n = min(4, adap->sge.ethqsets - 4 * r);
+               const struct sge_eth_rxq *rx = &s->ethrxq[base_qset];
+               const struct sge_eth_txq *tx = &s->ethtxq[base_qset];
+
+               n = min(4, s->ethqsets - 4 * r);
 
                S("QType:", "Ethernet");
                S("Interface:",
@@ -2494,8 +2581,7 @@ do { \
                R("RspQ CIDX:", rspq.cidx);
                R("RspQ Gen:", rspq.gen);
                S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
-               S3("u", "Intr pktcnt:",
-                  adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
+               S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
                R("FL ID:", fl.cntxt_id);
                R("FL size:", fl.size - 8);
                R("FL pend:", fl.pend_cred);
@@ -2520,9 +2606,196 @@ do { \
                RL("FLLow:", fl.low);
                RL("FLStarving:", fl.starving);
 
-       } else if (ctrl_idx < ctrl_entries) {
-               const struct sge_ctrl_txq *tx = &adap->sge.ctrlq[ctrl_idx * 4];
-               int n = min(4, adap->params.nports - 4 * ctrl_idx);
+               goto unlock;
+       }
+
+       r -= eth_entries;
+       if (r < uld_txq_entries[CXGB4_TX_OFLD]) {
+               const struct sge_uld_txq *tx;
+
+               utxq_info = s->uld_txq_info[CXGB4_TX_OFLD];
+               tx = &utxq_info->uldtxq[r * 4];
+               n = min(4, utxq_info->ntxq - 4 * r);
+
+               S("QType:", "OFLD-TXQ");
+               T("TxQ ID:", q.cntxt_id);
+               T("TxQ size:", q.size);
+               T("TxQ inuse:", q.in_use);
+               T("TxQ CIDX:", q.cidx);
+               T("TxQ PIDX:", q.pidx);
+
+               goto unlock;
+       }
+
+       r -= uld_txq_entries[CXGB4_TX_OFLD];
+       if (r < uld_rxq_entries[CXGB4_ULD_RDMA]) {
+               const struct sge_ofld_rxq *rx;
+
+               urxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA];
+               rx = &urxq_info->uldrxq[r * 4];
+               n = min(4, urxq_info->nrxq - 4 * r);
+
+               S("QType:", "RDMA-CPL");
+               S("Interface:",
+                 rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
+               R("RspQ ID:", rspq.abs_id);
+               R("RspQ size:", rspq.size);
+               R("RspQE size:", rspq.iqe_len);
+               R("RspQ CIDX:", rspq.cidx);
+               R("RspQ Gen:", rspq.gen);
+               S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+               S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
+               R("FL ID:", fl.cntxt_id);
+               R("FL size:", fl.size - 8);
+               R("FL pend:", fl.pend_cred);
+               R("FL avail:", fl.avail);
+               R("FL PIDX:", fl.pidx);
+               R("FL CIDX:", fl.cidx);
+
+               goto unlock;
+       }
+
+       r -= uld_rxq_entries[CXGB4_ULD_RDMA];
+       if (r < uld_ciq_entries[CXGB4_ULD_RDMA]) {
+               const struct sge_ofld_rxq *rx;
+               int ciq_idx = 0;
+
+               urxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA];
+               ciq_idx = urxq_info->nrxq + (r * 4);
+               rx = &urxq_info->uldrxq[ciq_idx];
+               n = min(4, urxq_info->nciq - 4 * r);
+
+               S("QType:", "RDMA-CIQ");
+               S("Interface:",
+                 rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
+               R("RspQ ID:", rspq.abs_id);
+               R("RspQ size:", rspq.size);
+               R("RspQE size:", rspq.iqe_len);
+               R("RspQ CIDX:", rspq.cidx);
+               R("RspQ Gen:", rspq.gen);
+               S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+               S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
+
+               goto unlock;
+       }
+
+       r -= uld_ciq_entries[CXGB4_ULD_RDMA];
+       if (r < uld_rxq_entries[CXGB4_ULD_ISCSI]) {
+               const struct sge_ofld_rxq *rx;
+
+               urxq_info = s->uld_rxq_info[CXGB4_ULD_ISCSI];
+               rx = &urxq_info->uldrxq[r * 4];
+               n = min(4, urxq_info->nrxq - 4 * r);
+
+               S("QType:", "iSCSI");
+               R("RspQ ID:", rspq.abs_id);
+               R("RspQ size:", rspq.size);
+               R("RspQE size:", rspq.iqe_len);
+               R("RspQ CIDX:", rspq.cidx);
+               R("RspQ Gen:", rspq.gen);
+               S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+               S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
+               R("FL ID:", fl.cntxt_id);
+               R("FL size:", fl.size - 8);
+               R("FL pend:", fl.pend_cred);
+               R("FL avail:", fl.avail);
+               R("FL PIDX:", fl.pidx);
+               R("FL CIDX:", fl.cidx);
+
+               goto unlock;
+       }
+
+       r -= uld_rxq_entries[CXGB4_ULD_ISCSI];
+       if (r < uld_rxq_entries[CXGB4_ULD_ISCSIT]) {
+               const struct sge_ofld_rxq *rx;
+
+               urxq_info = s->uld_rxq_info[CXGB4_ULD_ISCSIT];
+               rx = &urxq_info->uldrxq[r * 4];
+               n = min(4, urxq_info->nrxq - 4 * r);
+
+               S("QType:", "iSCSIT");
+               R("RspQ ID:", rspq.abs_id);
+               R("RspQ size:", rspq.size);
+               R("RspQE size:", rspq.iqe_len);
+               R("RspQ CIDX:", rspq.cidx);
+               R("RspQ Gen:", rspq.gen);
+               S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+               S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
+               R("FL ID:", fl.cntxt_id);
+               R("FL size:", fl.size - 8);
+               R("FL pend:", fl.pend_cred);
+               R("FL avail:", fl.avail);
+               R("FL PIDX:", fl.pidx);
+               R("FL CIDX:", fl.cidx);
+
+               goto unlock;
+       }
+
+       r -= uld_rxq_entries[CXGB4_ULD_ISCSIT];
+       if (r < uld_rxq_entries[CXGB4_ULD_TLS]) {
+               const struct sge_ofld_rxq *rx;
+
+               urxq_info = s->uld_rxq_info[CXGB4_ULD_TLS];
+               rx = &urxq_info->uldrxq[r * 4];
+               n = min(4, urxq_info->nrxq - 4 * r);
+
+               S("QType:", "TLS");
+               R("RspQ ID:", rspq.abs_id);
+               R("RspQ size:", rspq.size);
+               R("RspQE size:", rspq.iqe_len);
+               R("RspQ CIDX:", rspq.cidx);
+               R("RspQ Gen:", rspq.gen);
+               S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+               S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
+               R("FL ID:", fl.cntxt_id);
+               R("FL size:", fl.size - 8);
+               R("FL pend:", fl.pend_cred);
+               R("FL avail:", fl.avail);
+               R("FL PIDX:", fl.pidx);
+               R("FL CIDX:", fl.cidx);
+
+               goto unlock;
+       }
+
+       r -= uld_rxq_entries[CXGB4_ULD_TLS];
+       if (r < uld_txq_entries[CXGB4_TX_CRYPTO]) {
+               const struct sge_ofld_rxq *rx;
+               const struct sge_uld_txq *tx;
+
+               utxq_info = s->uld_txq_info[CXGB4_TX_CRYPTO];
+               urxq_info = s->uld_rxq_info[CXGB4_ULD_CRYPTO];
+               tx = &utxq_info->uldtxq[r * 4];
+               rx = &urxq_info->uldrxq[r * 4];
+               n = min(4, utxq_info->ntxq - 4 * r);
+
+               S("QType:", "Crypto");
+               T("TxQ ID:", q.cntxt_id);
+               T("TxQ size:", q.size);
+               T("TxQ inuse:", q.in_use);
+               T("TxQ CIDX:", q.cidx);
+               T("TxQ PIDX:", q.pidx);
+               R("RspQ ID:", rspq.abs_id);
+               R("RspQ size:", rspq.size);
+               R("RspQE size:", rspq.iqe_len);
+               R("RspQ CIDX:", rspq.cidx);
+               R("RspQ Gen:", rspq.gen);
+               S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+               S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
+               R("FL ID:", fl.cntxt_id);
+               R("FL size:", fl.size - 8);
+               R("FL pend:", fl.pend_cred);
+               R("FL avail:", fl.avail);
+               R("FL PIDX:", fl.pidx);
+               R("FL CIDX:", fl.cidx);
+
+               goto unlock;
+       }
+
+       r -= uld_txq_entries[CXGB4_TX_CRYPTO];
+       if (r < ctrl_entries) {
+               const struct sge_ctrl_txq *tx = &s->ctrlq[r * 4];
+
+               n = min(4, adap->params.nports - 4 * r);
 
                S("QType:", "Control");
                T("TxQ ID:", q.cntxt_id);
@@ -2532,8 +2805,13 @@ do { \
                T("TxQ PIDX:", q.pidx);
                TL("TxQFull:", q.stops);
                TL("TxQRestarts:", q.restarts);
-       } else if (fq_idx == 0) {
-               const struct sge_rspq *evtq = &adap->sge.fw_evtq;
+
+               goto unlock;
+       }
+
+       r -= ctrl_entries;
+       if (r < 1) {
+               const struct sge_rspq *evtq = &s->fw_evtq;
 
                seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
                seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
@@ -2544,8 +2822,13 @@ do { \
                seq_printf(seq, "%-12s %16u\n", "Intr delay:",
                           qtimer_val(adap, evtq));
                seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
-                          adap->sge.counter_val[evtq->pktcnt_idx]);
+                          s->counter_val[evtq->pktcnt_idx]);
+
+               goto unlock;
        }
+
+unlock:
+       mutex_unlock(&uld_mutex);
 #undef R
 #undef RL
 #undef T
@@ -2559,8 +2842,21 @@ do { \
 
 static int sge_queue_entries(const struct adapter *adap)
 {
+       int tot_uld_entries = 0;
+       int i;
+
+       mutex_lock(&uld_mutex);
+       for (i = 0; i < CXGB4_TX_MAX; i++)
+               tot_uld_entries += sge_qinfo_uld_txq_entries(adap, i);
+
+       for (i = 0; i < CXGB4_ULD_MAX; i++) {
+               tot_uld_entries += sge_qinfo_uld_rxq_entries(adap, i);
+               tot_uld_entries += sge_qinfo_uld_ciq_entries(adap, i);
+       }
+       mutex_unlock(&uld_mutex);
+
        return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
-              DIV_ROUND_UP(adap->sge.ofldqsets, 4) +
+              tot_uld_entries +
               DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
 }
 
@@ -2851,15 +3147,17 @@ static int meminfo_show(struct seq_file *seq, void *v)
        mem_region_show(seq, "uP Extmem2:", meminfo.up_extmem2_lo,
                        meminfo.up_extmem2_hi);
 
-       seq_printf(seq, "\n%u Rx pages of size %uKiB for %u channels\n",
-                  meminfo.rx_pages_data[0], meminfo.rx_pages_data[1],
-                  meminfo.rx_pages_data[2]);
+       seq_printf(seq, "\n%u Rx pages (%u free) of size %uKiB for %u channels\n",
+                  meminfo.rx_pages_data[0], meminfo.free_rx_cnt,
+                  meminfo.rx_pages_data[1], meminfo.rx_pages_data[2]);
 
-       seq_printf(seq, "%u Tx pages of size %u%ciB for %u channels\n",
-                  meminfo.tx_pages_data[0], meminfo.tx_pages_data[1],
-                  meminfo.tx_pages_data[2], meminfo.tx_pages_data[3]);
+       seq_printf(seq, "%u Tx pages (%u free) of size %u%ciB for %u channels\n",
+                  meminfo.tx_pages_data[0], meminfo.free_tx_cnt,
+                  meminfo.tx_pages_data[1], meminfo.tx_pages_data[2],
+                  meminfo.tx_pages_data[3]);
 
-       seq_printf(seq, "%u p-structs\n\n", meminfo.p_structs);
+       seq_printf(seq, "%u p-structs (%u free)\n\n",
+                  meminfo.p_structs, meminfo.p_structs_free_cnt);
 
        for (i = 0; i < 4; i++)
                /* For T6 these are MAC buffer groups */
@@ -2924,6 +3222,169 @@ static const struct file_operations chcr_stats_debugfs_fops = {
         .llseek  = seq_lseek,
         .release = single_release,
 };
+
+#define PRINT_ADAP_STATS(string, value) \
+       seq_printf(seq, "%-25s %-20llu\n", (string), \
+                  (unsigned long long)(value))
+
+#define PRINT_CH_STATS(string, value) \
+do { \
+       seq_printf(seq, "%-25s ", (string)); \
+       for (i = 0; i < adap->params.arch.nchan; i++) \
+               seq_printf(seq, "%-20llu ", \
+                          (unsigned long long)stats.value[i]); \
+       seq_printf(seq, "\n"); \
+} while (0)
+
+#define PRINT_CH_STATS2(string, value) \
+do { \
+       seq_printf(seq, "%-25s ", (string)); \
+       for (i = 0; i < adap->params.arch.nchan; i++) \
+               seq_printf(seq, "%-20llu ", \
+                          (unsigned long long)stats[i].value); \
+       seq_printf(seq, "\n"); \
+} while (0)
+
+static void show_tcp_stats(struct seq_file *seq)
+{
+       struct adapter *adap = seq->private;
+       struct tp_tcp_stats v4, v6;
+
+       spin_lock(&adap->stats_lock);
+       t4_tp_get_tcp_stats(adap, &v4, &v6, false);
+       spin_unlock(&adap->stats_lock);
+
+       PRINT_ADAP_STATS("tcp_ipv4_out_rsts:", v4.tcp_out_rsts);
+       PRINT_ADAP_STATS("tcp_ipv4_in_segs:", v4.tcp_in_segs);
+       PRINT_ADAP_STATS("tcp_ipv4_out_segs:", v4.tcp_out_segs);
+       PRINT_ADAP_STATS("tcp_ipv4_retrans_segs:", v4.tcp_retrans_segs);
+       PRINT_ADAP_STATS("tcp_ipv6_out_rsts:", v6.tcp_out_rsts);
+       PRINT_ADAP_STATS("tcp_ipv6_in_segs:", v6.tcp_in_segs);
+       PRINT_ADAP_STATS("tcp_ipv6_out_segs:", v6.tcp_out_segs);
+       PRINT_ADAP_STATS("tcp_ipv6_retrans_segs:", v6.tcp_retrans_segs);
+}
+
+static void show_ddp_stats(struct seq_file *seq)
+{
+       struct adapter *adap = seq->private;
+       struct tp_usm_stats stats;
+
+       spin_lock(&adap->stats_lock);
+       t4_get_usm_stats(adap, &stats, false);
+       spin_unlock(&adap->stats_lock);
+
+       PRINT_ADAP_STATS("usm_ddp_frames:", stats.frames);
+       PRINT_ADAP_STATS("usm_ddp_octets:", stats.octets);
+       PRINT_ADAP_STATS("usm_ddp_drops:", stats.drops);
+}
+
+static void show_rdma_stats(struct seq_file *seq)
+{
+       struct adapter *adap = seq->private;
+       struct tp_rdma_stats stats;
+
+       spin_lock(&adap->stats_lock);
+       t4_tp_get_rdma_stats(adap, &stats, false);
+       spin_unlock(&adap->stats_lock);
+
+       PRINT_ADAP_STATS("rdma_no_rqe_mod_defer:", stats.rqe_dfr_mod);
+       PRINT_ADAP_STATS("rdma_no_rqe_pkt_defer:", stats.rqe_dfr_pkt);
+}
+
+static void show_tp_err_adapter_stats(struct seq_file *seq)
+{
+       struct adapter *adap = seq->private;
+       struct tp_err_stats stats;
+
+       spin_lock(&adap->stats_lock);
+       t4_tp_get_err_stats(adap, &stats, false);
+       spin_unlock(&adap->stats_lock);
+
+       PRINT_ADAP_STATS("tp_err_ofld_no_neigh:", stats.ofld_no_neigh);
+       PRINT_ADAP_STATS("tp_err_ofld_cong_defer:", stats.ofld_cong_defer);
+}
+
+static void show_cpl_stats(struct seq_file *seq)
+{
+       struct adapter *adap = seq->private;
+       struct tp_cpl_stats stats;
+       u8 i;
+
+       spin_lock(&adap->stats_lock);
+       t4_tp_get_cpl_stats(adap, &stats, false);
+       spin_unlock(&adap->stats_lock);
+
+       PRINT_CH_STATS("tp_cpl_requests:", req);
+       PRINT_CH_STATS("tp_cpl_responses:", rsp);
+}
+
+static void show_tp_err_channel_stats(struct seq_file *seq)
+{
+       struct adapter *adap = seq->private;
+       struct tp_err_stats stats;
+       u8 i;
+
+       spin_lock(&adap->stats_lock);
+       t4_tp_get_err_stats(adap, &stats, false);
+       spin_unlock(&adap->stats_lock);
+
+       PRINT_CH_STATS("tp_mac_in_errs:", mac_in_errs);
+       PRINT_CH_STATS("tp_hdr_in_errs:", hdr_in_errs);
+       PRINT_CH_STATS("tp_tcp_in_errs:", tcp_in_errs);
+       PRINT_CH_STATS("tp_tcp6_in_errs:", tcp6_in_errs);
+       PRINT_CH_STATS("tp_tnl_cong_drops:", tnl_cong_drops);
+       PRINT_CH_STATS("tp_tnl_tx_drops:", tnl_tx_drops);
+       PRINT_CH_STATS("tp_ofld_vlan_drops:", ofld_vlan_drops);
+       PRINT_CH_STATS("tp_ofld_chan_drops:", ofld_chan_drops);
+}
+
+static void show_fcoe_stats(struct seq_file *seq)
+{
+       struct adapter *adap = seq->private;
+       struct tp_fcoe_stats stats[NCHAN];
+       u8 i;
+
+       spin_lock(&adap->stats_lock);
+       for (i = 0; i < adap->params.arch.nchan; i++)
+               t4_get_fcoe_stats(adap, i, &stats[i], false);
+       spin_unlock(&adap->stats_lock);
+
+       PRINT_CH_STATS2("fcoe_octets_ddp", octets_ddp);
+       PRINT_CH_STATS2("fcoe_frames_ddp", frames_ddp);
+       PRINT_CH_STATS2("fcoe_frames_drop", frames_drop);
+}
+
+#undef PRINT_CH_STATS2
+#undef PRINT_CH_STATS
+#undef PRINT_ADAP_STATS
+
+static int tp_stats_show(struct seq_file *seq, void *v)
+{
+       struct adapter *adap = seq->private;
+
+       seq_puts(seq, "\n--------Adapter Stats--------\n");
+       show_tcp_stats(seq);
+       show_ddp_stats(seq);
+       show_rdma_stats(seq);
+       show_tp_err_adapter_stats(seq);
+
+       seq_puts(seq, "\n-------- Channel Stats --------\n");
+       if (adap->params.arch.nchan == NCHAN)
+               seq_printf(seq, "%-25s %-20s %-20s %-20s %-20s\n",
+                          " ", "channel 0", "channel 1",
+                          "channel 2", "channel 3");
+       else
+               seq_printf(seq, "%-25s %-20s %-20s\n",
+                          " ", "channel 0", "channel 1");
+       show_cpl_stats(seq);
+       show_tp_err_channel_stats(seq);
+       show_fcoe_stats(seq);
+
+       return 0;
+}
+
+DEFINE_SIMPLE_DEBUGFS_FILE(tp_stats);
+
 /* Add an array of Debug FS files.
  */
 void add_debugfs_files(struct adapter *adap,
@@ -2973,6 +3434,7 @@ int t4_setup_debugfs(struct adapter *adap)
                { "rss_key", &rss_key_debugfs_fops, 0400, 0 },
                { "rss_pf_config", &rss_pf_config_debugfs_fops, 0400, 0 },
                { "rss_vf_config", &rss_vf_config_debugfs_fops, 0400, 0 },
+               { "resources", &resources_debugfs_fops, 0400, 0 },
                { "sge_qinfo", &sge_qinfo_debugfs_fops, 0400, 0 },
                { "ibq_tp0",  &cim_ibq_fops, 0400, 0 },
                { "ibq_tp1",  &cim_ibq_fops, 0400, 1 },
@@ -2999,6 +3461,7 @@ int t4_setup_debugfs(struct adapter *adap)
                { "blocked_fl", &blocked_fl_fops, 0600, 0 },
                { "meminfo", &meminfo_fops, 0400, 0 },
                { "crypto", &chcr_stats_debugfs_fops, 0400, 0 },
+               { "tp_stats", &tp_stats_debugfs_fops, 0400, 0 },
        };
 
        /* Debug FS nodes common to all T5 and later adapters.
index f7eef93ffc87d440fcf067d83e1f082a256f38b1..d07230c892a546d6e2a31f7cddb2f51116cd9035 100644 (file)
@@ -115,42 +115,10 @@ static char adapter_stats_strings[][ETH_GSTRING_LEN] = {
        "db_drop                ",
        "db_full                ",
        "db_empty               ",
-       "tcp_ipv4_out_rsts      ",
-       "tcp_ipv4_in_segs       ",
-       "tcp_ipv4_out_segs      ",
-       "tcp_ipv4_retrans_segs  ",
-       "tcp_ipv6_out_rsts      ",
-       "tcp_ipv6_in_segs       ",
-       "tcp_ipv6_out_segs      ",
-       "tcp_ipv6_retrans_segs  ",
-       "usm_ddp_frames         ",
-       "usm_ddp_octets         ",
-       "usm_ddp_drops          ",
-       "rdma_no_rqe_mod_defer  ",
-       "rdma_no_rqe_pkt_defer  ",
-       "tp_err_ofld_no_neigh   ",
-       "tp_err_ofld_cong_defer ",
        "write_coal_success     ",
        "write_coal_fail        ",
 };
 
-static char channel_stats_strings[][ETH_GSTRING_LEN] = {
-       "--------Channel--------- ",
-       "tp_cpl_requests        ",
-       "tp_cpl_responses       ",
-       "tp_mac_in_errs         ",
-       "tp_hdr_in_errs         ",
-       "tp_tcp_in_errs         ",
-       "tp_tcp6_in_errs        ",
-       "tp_tnl_cong_drops      ",
-       "tp_tnl_tx_drops        ",
-       "tp_ofld_vlan_drops     ",
-       "tp_ofld_chan_drops     ",
-       "fcoe_octets_ddp        ",
-       "fcoe_frames_ddp        ",
-       "fcoe_frames_drop       ",
-};
-
 static char loopback_stats_strings[][ETH_GSTRING_LEN] = {
        "-------Loopback----------- ",
        "octets_ok              ",
@@ -177,14 +145,19 @@ static char loopback_stats_strings[][ETH_GSTRING_LEN] = {
        "bg3_frames_trunc       ",
 };
 
+static const char cxgb4_priv_flags_strings[][ETH_GSTRING_LEN] = {
+       [PRIV_FLAG_PORT_TX_VM_BIT] = "port_tx_vm_wr",
+};
+
 static int get_sset_count(struct net_device *dev, int sset)
 {
        switch (sset) {
        case ETH_SS_STATS:
                return ARRAY_SIZE(stats_strings) +
                       ARRAY_SIZE(adapter_stats_strings) +
-                      ARRAY_SIZE(channel_stats_strings) +
                       ARRAY_SIZE(loopback_stats_strings);
+       case ETH_SS_PRIV_FLAGS:
+               return ARRAY_SIZE(cxgb4_priv_flags_strings);
        default:
                return -EOPNOTSUPP;
        }
@@ -235,6 +208,7 @@ static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
                         FW_HDR_FW_VER_MINOR_G(exprom_vers),
                         FW_HDR_FW_VER_MICRO_G(exprom_vers),
                         FW_HDR_FW_VER_BUILD_G(exprom_vers));
+       info->n_priv_flags = ARRAY_SIZE(cxgb4_priv_flags_strings);
 }
 
 static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -245,11 +219,11 @@ static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
                memcpy(data, adapter_stats_strings,
                       sizeof(adapter_stats_strings));
                data += sizeof(adapter_stats_strings);
-               memcpy(data, channel_stats_strings,
-                      sizeof(channel_stats_strings));
-               data += sizeof(channel_stats_strings);
                memcpy(data, loopback_stats_strings,
                       sizeof(loopback_stats_strings));
+       } else if (stringset == ETH_SS_PRIV_FLAGS) {
+               memcpy(data, cxgb4_priv_flags_strings,
+                      sizeof(cxgb4_priv_flags_strings));
        }
 }
 
@@ -270,41 +244,10 @@ struct adapter_stats {
        u64 db_drop;
        u64 db_full;
        u64 db_empty;
-       u64 tcp_v4_out_rsts;
-       u64 tcp_v4_in_segs;
-       u64 tcp_v4_out_segs;
-       u64 tcp_v4_retrans_segs;
-       u64 tcp_v6_out_rsts;
-       u64 tcp_v6_in_segs;
-       u64 tcp_v6_out_segs;
-       u64 tcp_v6_retrans_segs;
-       u64 frames;
-       u64 octets;
-       u64 drops;
-       u64 rqe_dfr_mod;
-       u64 rqe_dfr_pkt;
-       u64 ofld_no_neigh;
-       u64 ofld_cong_defer;
        u64 wc_success;
        u64 wc_fail;
 };
 
-struct channel_stats {
-       u64 cpl_req;
-       u64 cpl_rsp;
-       u64 mac_in_errs;
-       u64 hdr_in_errs;
-       u64 tcp_in_errs;
-       u64 tcp6_in_errs;
-       u64 tnl_cong_drops;
-       u64 tnl_tx_drops;
-       u64 ofld_vlan_drops;
-       u64 ofld_chan_drops;
-       u64 octets_ddp;
-       u64 frames_ddp;
-       u64 frames_drop;
-};
-
 static void collect_sge_port_stats(const struct adapter *adap,
                                   const struct port_info *p,
                                   struct queue_port_stats *s)
@@ -327,45 +270,14 @@ static void collect_sge_port_stats(const struct adapter *adap,
 
 static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
 {
-       struct tp_tcp_stats v4, v6;
-       struct tp_rdma_stats rdma_stats;
-       struct tp_err_stats err_stats;
-       struct tp_usm_stats usm_stats;
        u64 val1, val2;
 
        memset(s, 0, sizeof(*s));
 
-       spin_lock(&adap->stats_lock);
-       t4_tp_get_tcp_stats(adap, &v4, &v6, false);
-       t4_tp_get_rdma_stats(adap, &rdma_stats, false);
-       t4_get_usm_stats(adap, &usm_stats, false);
-       t4_tp_get_err_stats(adap, &err_stats, false);
-       spin_unlock(&adap->stats_lock);
-
        s->db_drop = adap->db_stats.db_drop;
        s->db_full = adap->db_stats.db_full;
        s->db_empty = adap->db_stats.db_empty;
 
-       s->tcp_v4_out_rsts = v4.tcp_out_rsts;
-       s->tcp_v4_in_segs = v4.tcp_in_segs;
-       s->tcp_v4_out_segs = v4.tcp_out_segs;
-       s->tcp_v4_retrans_segs = v4.tcp_retrans_segs;
-       s->tcp_v6_out_rsts = v6.tcp_out_rsts;
-       s->tcp_v6_in_segs = v6.tcp_in_segs;
-       s->tcp_v6_out_segs = v6.tcp_out_segs;
-       s->tcp_v6_retrans_segs = v6.tcp_retrans_segs;
-
-       if (is_offload(adap)) {
-               s->frames = usm_stats.frames;
-               s->octets = usm_stats.octets;
-               s->drops = usm_stats.drops;
-               s->rqe_dfr_mod = rdma_stats.rqe_dfr_mod;
-               s->rqe_dfr_pkt = rdma_stats.rqe_dfr_pkt;
-       }
-
-       s->ofld_no_neigh = err_stats.ofld_no_neigh;
-       s->ofld_cong_defer = err_stats.ofld_cong_defer;
-
        if (!is_t4(adap->params.chip)) {
                int v;
 
@@ -379,36 +291,6 @@ static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
        }
 }
 
-static void collect_channel_stats(struct adapter *adap, struct channel_stats *s,
-                                 u8 i)
-{
-       struct tp_cpl_stats cpl_stats;
-       struct tp_err_stats err_stats;
-       struct tp_fcoe_stats fcoe_stats;
-
-       memset(s, 0, sizeof(*s));
-
-       spin_lock(&adap->stats_lock);
-       t4_tp_get_cpl_stats(adap, &cpl_stats, false);
-       t4_tp_get_err_stats(adap, &err_stats, false);
-       t4_get_fcoe_stats(adap, i, &fcoe_stats, false);
-       spin_unlock(&adap->stats_lock);
-
-       s->cpl_req = cpl_stats.req[i];
-       s->cpl_rsp = cpl_stats.rsp[i];
-       s->mac_in_errs = err_stats.mac_in_errs[i];
-       s->hdr_in_errs = err_stats.hdr_in_errs[i];
-       s->tcp_in_errs = err_stats.tcp_in_errs[i];
-       s->tcp6_in_errs = err_stats.tcp6_in_errs[i];
-       s->tnl_cong_drops = err_stats.tnl_cong_drops[i];
-       s->tnl_tx_drops = err_stats.tnl_tx_drops[i];
-       s->ofld_vlan_drops = err_stats.ofld_vlan_drops[i];
-       s->ofld_chan_drops = err_stats.ofld_chan_drops[i];
-       s->octets_ddp = fcoe_stats.octets_ddp;
-       s->frames_ddp = fcoe_stats.frames_ddp;
-       s->frames_drop = fcoe_stats.frames_drop;
-}
-
 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
                      u64 *data)
 {
@@ -428,11 +310,6 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
        collect_adapter_stats(adapter, (struct adapter_stats *)data);
        data += sizeof(struct adapter_stats) / sizeof(u64);
 
-       *data++ = (u64)pi->port_id;
-       collect_channel_stats(adapter, (struct channel_stats *)data,
-                             pi->port_id);
-       data += sizeof(struct channel_stats) / sizeof(u64);
-
        *data++ = (u64)pi->port_id;
        memset(&s, 0, sizeof(s));
        t4_get_lb_stats(adapter, pi->port_id, &s);
@@ -751,13 +628,10 @@ static int get_link_ksettings(struct net_device *dev,
        fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
                       link_ksettings->link_modes.lp_advertising);
 
-       if (netif_carrier_ok(dev)) {
-               base->speed = pi->link_cfg.speed;
-               base->duplex = DUPLEX_FULL;
-       } else {
-               base->speed = SPEED_UNKNOWN;
-               base->duplex = DUPLEX_UNKNOWN;
-       }
+       base->speed = (netif_carrier_ok(dev)
+                      ? pi->link_cfg.speed
+                      : SPEED_UNKNOWN);
+       base->duplex = DUPLEX_FULL;
 
        if (pi->link_cfg.fc & PAUSE_RX) {
                if (pi->link_cfg.fc & PAUSE_TX) {
@@ -1499,6 +1373,36 @@ static int cxgb4_get_module_eeprom(struct net_device *dev,
                         offset, len, &data[eprom->len - len]);
 }
 
+static u32 cxgb4_get_priv_flags(struct net_device *netdev)
+{
+       struct port_info *pi = netdev_priv(netdev);
+       struct adapter *adapter = pi->adapter;
+
+       return (adapter->eth_flags | pi->eth_flags);
+}
+
+/**
+ *     set_flags - set/unset specified flags if passed in new_flags
+ *     @cur_flags: pointer to current flags
+ *     @new_flags: new incoming flags
+ *     @flags: set of flags to set/unset
+ */
+static inline void set_flags(u32 *cur_flags, u32 new_flags, u32 flags)
+{
+       *cur_flags = (*cur_flags & ~flags) | (new_flags & flags);
+}
+
+static int cxgb4_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+       struct port_info *pi = netdev_priv(netdev);
+       struct adapter *adapter = pi->adapter;
+
+       set_flags(&adapter->eth_flags, flags, PRIV_FLAGS_ADAP);
+       set_flags(&pi->eth_flags, flags, PRIV_FLAGS_PORT);
+
+       return 0;
+}
+
 static const struct ethtool_ops cxgb_ethtool_ops = {
        .get_link_ksettings = get_link_ksettings,
        .set_link_ksettings = set_link_ksettings,
@@ -1535,6 +1439,8 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
        .get_dump_data     = get_dump_data,
        .get_module_info   = cxgb4_get_module_info,
        .get_module_eeprom = cxgb4_get_module_eeprom,
+       .get_priv_flags    = cxgb4_get_priv_flags,
+       .set_priv_flags    = cxgb4_set_priv_flags,
 };
 
 void cxgb4_set_ethtool_ops(struct net_device *netdev)
index a8926e97935eb01ecdea10cfe760b13d3409634c..0f7ce71205e6846a29041ea5fdd26b54bc6264e8 100644 (file)
@@ -554,10 +554,9 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
 
                        dev = q->adap->port[q->adap->chan_map[port]];
                        dcbxdis = (action == FW_PORT_ACTION_GET_PORT_INFO
-                                  ? !!(pcmd->u.info.dcbxdis_pkd &
-                                       FW_PORT_CMD_DCBXDIS_F)
-                                  : !!(pcmd->u.info32.lstatus32_to_cbllen32 &
-                                       FW_PORT_CMD_DCBXDIS32_F));
+                         ? !!(pcmd->u.info.dcbxdis_pkd & FW_PORT_CMD_DCBXDIS_F)
+                         : !!(be32_to_cpu(pcmd->u.info32.lstatus32_to_cbllen32)
+                              & FW_PORT_CMD_DCBXDIS32_F));
                        state_input = (dcbxdis
                                       ? CXGB4_DCB_INPUT_FW_DISABLED
                                       : CXGB4_DCB_INPUT_FW_ENABLED);
@@ -924,12 +923,14 @@ static int setup_sge_queues(struct adapter *adap)
                     QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id));
        return 0;
 freeout:
+       dev_err(adap->pdev_dev, "Can't allocate queues, err=%d\n", -err);
        t4_free_sge_resources(adap);
        return err;
 }
 
 static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
-                            void *accel_priv, select_queue_fallback_t fallback)
+                            struct net_device *sb_dev,
+                            select_queue_fallback_t fallback)
 {
        int txq;
 
@@ -971,7 +972,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
                return txq;
        }
 
-       return fallback(dev, skb) % dev->real_num_tx_queues;
+       return fallback(dev, skb, NULL) % dev->real_num_tx_queues;
 }
 
 static int closest_timer(const struct sge *s, int time)
@@ -3016,7 +3017,7 @@ static int cxgb_setup_tc_block(struct net_device *dev,
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block, cxgb_setup_tc_block_cb,
-                                            pi, dev);
+                                            pi, dev, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, cxgb_setup_tc_block_cb, pi);
                return 0;
@@ -3219,7 +3220,7 @@ static netdev_features_t cxgb_fix_features(struct net_device *dev,
 static const struct net_device_ops cxgb4_netdev_ops = {
        .ndo_open             = cxgb_open,
        .ndo_stop             = cxgb_close,
-       .ndo_start_xmit       = t4_eth_xmit,
+       .ndo_start_xmit       = t4_start_xmit,
        .ndo_select_queue     = cxgb_select_queue,
        .ndo_get_stats64      = cxgb_get_stats,
        .ndo_set_rx_mode      = cxgb_set_rxmode,
@@ -3538,6 +3539,16 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
        u32 v;
        int ret;
 
+       /* Now that we've successfully configured and initialized the adapter
+        * can ask the Firmware what resources it has provisioned for us.
+        */
+       ret = t4_get_pfres(adap);
+       if (ret) {
+               dev_err(adap->pdev_dev,
+                       "Unable to retrieve resource provisioning information\n");
+               return ret;
+       }
+
        /* get device capabilities */
        memset(c, 0, sizeof(*c));
        c->op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
@@ -4172,32 +4183,6 @@ static int adap_init0(struct adapter *adap)
                        goto bye;
        }
 
-       /*
-        * Grab VPD parameters.  This should be done after we establish a
-        * connection to the firmware since some of the VPD parameters
-        * (notably the Core Clock frequency) are retrieved via requests to
-        * the firmware.  On the other hand, we need these fairly early on
-        * so we do this right after getting ahold of the firmware.
-        */
-       ret = t4_get_vpd_params(adap, &adap->params.vpd);
-       if (ret < 0)
-               goto bye;
-
-       /*
-        * Find out what ports are available to us.  Note that we need to do
-        * this before calling adap_init0_no_config() since it needs nports
-        * and portvec ...
-        */
-       v =
-           FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
-           FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_PORTVEC);
-       ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, &v, &port_vec);
-       if (ret < 0)
-               goto bye;
-
-       adap->params.nports = hweight32(port_vec);
-       adap->params.portvec = port_vec;
-
        /* If the firmware is initialized already, emit a simply note to that
         * effect. Otherwise, it's time to try initializing the adapter.
         */
@@ -4248,6 +4233,45 @@ static int adap_init0(struct adapter *adap)
                }
        }
 
+       /* Now that we've successfully configured and initialized the adapter
+        * (or found it already initialized), we can ask the Firmware what
+        * resources it has provisioned for us.
+        */
+       ret = t4_get_pfres(adap);
+       if (ret) {
+               dev_err(adap->pdev_dev,
+                       "Unable to retrieve resource provisioning information\n");
+               goto bye;
+       }
+
+       /* Grab VPD parameters.  This should be done after we establish a
+        * connection to the firmware since some of the VPD parameters
+        * (notably the Core Clock frequency) are retrieved via requests to
+        * the firmware.  On the other hand, we need these fairly early on
+        * so we do this right after getting ahold of the firmware.
+        *
+        * We need to do this after initializing the adapter because someone
+        * could have FLASHed a new VPD which won't be read by the firmware
+        * until we do the RESET ...
+        */
+       ret = t4_get_vpd_params(adap, &adap->params.vpd);
+       if (ret < 0)
+               goto bye;
+
+       /* Find out what ports are available to us.  Note that we need to do
+        * this before calling adap_init0_no_config() since it needs nports
+        * and portvec ...
+        */
+       v =
+           FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+           FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_PORTVEC);
+       ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, &v, &port_vec);
+       if (ret < 0)
+               goto bye;
+
+       adap->params.nports = hweight32(port_vec);
+       adap->params.portvec = port_vec;
+
        /* Give the SGE code a chance to pull in anything that it needs ...
         * Note that this must be called after we retrieve our VPD parameters
         * in order to know how to convert core ticks to seconds, etc.
@@ -4799,10 +4823,12 @@ static inline bool is_x_10g_port(const struct link_config *lc)
  * of ports we found and the number of available CPUs.  Most settings can be
  * modified by the admin prior to actual use.
  */
-static void cfg_queues(struct adapter *adap)
+static int cfg_queues(struct adapter *adap)
 {
        struct sge *s = &adap->sge;
-       int i = 0, n10g = 0, qidx = 0;
+       int i, n10g = 0, qidx = 0;
+       int niqflint, neq, avail_eth_qsets;
+       int max_eth_qsets = 32;
 #ifndef CONFIG_CHELSIO_T4_DCB
        int q10g = 0;
 #endif
@@ -4814,16 +4840,46 @@ static void cfg_queues(struct adapter *adap)
                adap->params.crypto = 0;
        }
 
-       n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
+       /* Calculate the number of Ethernet Queue Sets available based on
+        * resources provisioned for us.  We always have an Asynchronous
+        * Firmware Event Ingress Queue.  If we're operating in MSI or Legacy
+        * IRQ Pin Interrupt mode, then we'll also have a Forwarded Interrupt
+        * Ingress Queue.  Meanwhile, we need two Egress Queues for each
+        * Queue Set: one for the Free List and one for the Ethernet TX Queue.
+        *
+        * Note that we should also take into account all of the various
+        * Offload Queues.  But, in any situation where we're operating in
+        * a Resource Constrained Provisioning environment, doing any Offload
+        * at all is problematic ...
+        */
+       niqflint = adap->params.pfres.niqflint - 1;
+       if (!(adap->flags & USING_MSIX))
+               niqflint--;
+       neq = adap->params.pfres.neq / 2;
+       avail_eth_qsets = min(niqflint, neq);
+
+       if (avail_eth_qsets > max_eth_qsets)
+               avail_eth_qsets = max_eth_qsets;
+
+       if (avail_eth_qsets < adap->params.nports) {
+               dev_err(adap->pdev_dev, "avail_eth_qsets=%d < nports=%d\n",
+                       avail_eth_qsets, adap->params.nports);
+               return -ENOMEM;
+       }
+
+       /* Count the number of 10Gb/s or better ports */
+       for_each_port(adap, i)
+               n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
+
 #ifdef CONFIG_CHELSIO_T4_DCB
        /* For Data Center Bridging support we need to be able to support up
         * to 8 Traffic Priorities; each of which will be assigned to its
         * own TX Queue in order to prevent Head-Of-Line Blocking.
         */
-       if (adap->params.nports * 8 > MAX_ETH_QSETS) {
-               dev_err(adap->pdev_dev, "MAX_ETH_QSETS=%d < %d!\n",
-                       MAX_ETH_QSETS, adap->params.nports * 8);
-               BUG_ON(1);
+       if (adap->params.nports * 8 > avail_eth_qsets) {
+               dev_err(adap->pdev_dev, "DCB avail_eth_qsets=%d < %d!\n",
+                       avail_eth_qsets, adap->params.nports * 8);
+               return -ENOMEM;
        }
 
        for_each_port(adap, i) {
@@ -4839,7 +4895,7 @@ static void cfg_queues(struct adapter *adap)
         * per 10G port.
         */
        if (n10g)
-               q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g;
+               q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g;
        if (q10g > netif_get_num_default_rss_queues())
                q10g = netif_get_num_default_rss_queues();
 
@@ -4890,6 +4946,8 @@ static void cfg_queues(struct adapter *adap)
 
        init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
        init_rspq(adap, &s->intrq, 0, 1, 512, 64);
+
+       return 0;
 }
 
 /*
@@ -5630,10 +5688,15 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                }
        }
 
+       if (!(adapter->flags & FW_OK))
+               goto fw_attach_fail;
+
        /* Configure queues and allocate tables now, they can be needed as
         * soon as the first register_netdev completes.
         */
-       cfg_queues(adapter);
+       err = cfg_queues(adapter);
+       if (err)
+               goto out_free_dev;
 
        adapter->smt = t4_init_smt();
        if (!adapter->smt) {
@@ -5705,7 +5768,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                if (t4_read_reg(adapter, LE_DB_CONFIG_A) & HASHEN_F) {
                        u32 hash_base, hash_reg;
 
-                       if (chip <= CHELSIO_T5) {
+                       if (chip_ver <= CHELSIO_T5) {
                                hash_reg = LE_DB_TID_HASHBASE_A;
                                hash_base = t4_read_reg(adapter, hash_reg);
                                adapter->tids.hash_base = hash_base / 4;
@@ -5740,6 +5803,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto out_free_dev;
        }
 
+fw_attach_fail:
        /*
         * The card is now ready to go.  If any errors occur during device
         * registration we do not fail the whole card but rather proceed only
index 3ddd2c4acf6846e38697fde2f09f1b3aa300dbe6..623f73dd7738dbbb01b8f49649a4507d641b1d9a 100644 (file)
@@ -874,6 +874,9 @@ int cxgb4_init_tc_flower(struct adapter *adap)
 {
        int ret;
 
+       if (adap->tc_flower_initialized)
+               return -EEXIST;
+
        adap->flower_ht_params = cxgb4_tc_flower_ht_params;
        ret = rhashtable_init(&adap->flower_tbl, &adap->flower_ht_params);
        if (ret)
@@ -882,13 +885,18 @@ int cxgb4_init_tc_flower(struct adapter *adap)
        INIT_WORK(&adap->flower_stats_work, ch_flower_stats_handler);
        timer_setup(&adap->flower_stats_timer, ch_flower_stats_cb, 0);
        mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD);
+       adap->tc_flower_initialized = true;
        return 0;
 }
 
 void cxgb4_cleanup_tc_flower(struct adapter *adap)
 {
+       if (!adap->tc_flower_initialized)
+               return;
+
        if (adap->flower_stats_timer.function)
                del_timer_sync(&adap->flower_stats_timer);
        cancel_work_sync(&adap->flower_stats_work);
        rhashtable_destroy(&adap->flower_tbl);
+       adap->tc_flower_initialized = false;
 }
index 9148abb7994c8d9d91a75c01c286daa36ce9cd1b..7fc656680299703439d2e3bb590eaed762cd2afd 100644 (file)
@@ -539,6 +539,9 @@ void t4_cleanup_sched(struct adapter *adap)
                struct port_info *pi = netdev2pinfo(adap->port[j]);
 
                s = pi->sched_tbl;
+               if (!s)
+                       continue;
+
                for (i = 0; i < s->sched_size; i++) {
                        struct sched_class *e;
 
index 395e2a0e8d7f6235a36ae9ec73ebd5141b2f24ce..6807bc3a44fb7fad1fd6c229bafef6b5a8c063a1 100644 (file)
@@ -1288,13 +1288,13 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
 }
 
 /**
- *     t4_eth_xmit - add a packet to an Ethernet Tx queue
+ *     cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
  *     @skb: the packet
  *     @dev: the egress net device
  *
  *     Add a packet to an SGE Ethernet Tx queue.  Runs with softirqs disabled.
  */
-netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        u32 wr_mid, ctrl0, op;
        u64 cntrl, *end, *sgl;
@@ -1547,6 +1547,374 @@ out_free:       dev_kfree_skb_any(skb);
        return NETDEV_TX_OK;
 }
 
+/* Constants ... */
+enum {
+       /* Egress Queue sizes, producer and consumer indices are all in units
+        * of Egress Context Units bytes.  Note that as far as the hardware is
+        * concerned, the free list is an Egress Queue (the host produces free
+        * buffers which the hardware consumes) and free list entries are
+        * 64-bit PCI DMA addresses.
+        */
+       EQ_UNIT = SGE_EQ_IDXSIZE,
+       FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
+       TXD_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
+
+       T4VF_ETHTXQ_MAX_HDR = (sizeof(struct fw_eth_tx_pkt_vm_wr) +
+                              sizeof(struct cpl_tx_pkt_lso_core) +
+                              sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64),
+};
+
+/**
+ *     t4vf_is_eth_imm - can an Ethernet packet be sent as immediate data?
+ *     @skb: the packet
+ *
+ *     Returns whether an Ethernet packet is small enough to fit completely as
+ *     immediate data.
+ */
+static inline int t4vf_is_eth_imm(const struct sk_buff *skb)
+{
+       /* The VF Driver uses the FW_ETH_TX_PKT_VM_WR firmware Work Request
+        * which does not accommodate immediate data.  We could dike out all
+        * of the support code for immediate data but that would tie our hands
+        * too much if we ever want to enhace the firmware.  It would also
+        * create more differences between the PF and VF Drivers.
+        */
+       return false;
+}
+
+/**
+ *     t4vf_calc_tx_flits - calculate the number of flits for a packet TX WR
+ *     @skb: the packet
+ *
+ *     Returns the number of flits needed for a TX Work Request for the
+ *     given Ethernet packet, including the needed WR and CPL headers.
+ */
+static inline unsigned int t4vf_calc_tx_flits(const struct sk_buff *skb)
+{
+       unsigned int flits;
+
+       /* If the skb is small enough, we can pump it out as a work request
+        * with only immediate data.  In that case we just have to have the
+        * TX Packet header plus the skb data in the Work Request.
+        */
+       if (t4vf_is_eth_imm(skb))
+               return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt),
+                                   sizeof(__be64));
+
+       /* Otherwise, we're going to have to construct a Scatter gather list
+        * of the skb body and fragments.  We also include the flits necessary
+        * for the TX Packet Work Request and CPL.  We always have a firmware
+        * Write Header (incorporated as part of the cpl_tx_pkt_lso and
+        * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
+        * message or, if we're doing a Large Send Offload, an LSO CPL message
+        * with an embedded TX Packet Write CPL message.
+        */
+       flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
+       if (skb_shinfo(skb)->gso_size)
+               flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
+                         sizeof(struct cpl_tx_pkt_lso_core) +
+                         sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
+       else
+               flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
+                         sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
+       return flits;
+}
+
+/**
+ *     cxgb4_vf_eth_xmit - add a packet to an Ethernet TX queue
+ *     @skb: the packet
+ *     @dev: the egress net device
+ *
+ *     Add a packet to an SGE Ethernet TX queue.  Runs with softirqs disabled.
+ */
+static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
+                                    struct net_device *dev)
+{
+       dma_addr_t addr[MAX_SKB_FRAGS + 1];
+       const struct skb_shared_info *ssi;
+       struct fw_eth_tx_pkt_vm_wr *wr;
+       int qidx, credits, max_pkt_len;
+       struct cpl_tx_pkt_core *cpl;
+       const struct port_info *pi;
+       unsigned int flits, ndesc;
+       struct sge_eth_txq *txq;
+       struct adapter *adapter;
+       u64 cntrl, *end;
+       u32 wr_mid;
+       const size_t fw_hdr_copy_len = sizeof(wr->ethmacdst) +
+                                      sizeof(wr->ethmacsrc) +
+                                      sizeof(wr->ethtype) +
+                                      sizeof(wr->vlantci);
+
+       /* The chip minimum packet length is 10 octets but the firmware
+        * command that we are using requires that we copy the Ethernet header
+        * (including the VLAN tag) into the header so we reject anything
+        * smaller than that ...
+        */
+       if (unlikely(skb->len < fw_hdr_copy_len))
+               goto out_free;
+
+       /* Discard the packet if the length is greater than mtu */
+       max_pkt_len = ETH_HLEN + dev->mtu;
+       if (skb_vlan_tag_present(skb))
+               max_pkt_len += VLAN_HLEN;
+       if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+               goto out_free;
+
+       /* Figure out which TX Queue we're going to use. */
+       pi = netdev_priv(dev);
+       adapter = pi->adapter;
+       qidx = skb_get_queue_mapping(skb);
+       WARN_ON(qidx >= pi->nqsets);
+       txq = &adapter->sge.ethtxq[pi->first_qset + qidx];
+
+       /* Take this opportunity to reclaim any TX Descriptors whose DMA
+        * transfers have completed.
+        */
+       cxgb4_reclaim_completed_tx(adapter, &txq->q, true);
+
+       /* Calculate the number of flits and TX Descriptors we're going to
+        * need along with how many TX Descriptors will be left over after
+        * we inject our Work Request.
+        */
+       flits = t4vf_calc_tx_flits(skb);
+       ndesc = flits_to_desc(flits);
+       credits = txq_avail(&txq->q) - ndesc;
+
+       if (unlikely(credits < 0)) {
+               /* Not enough room for this packet's Work Request.  Stop the
+                * TX Queue and return a "busy" condition.  The queue will get
+                * started later on when the firmware informs us that space
+                * has opened up.
+                */
+               eth_txq_stop(txq);
+               dev_err(adapter->pdev_dev,
+                       "%s: TX ring %u full while queue awake!\n",
+                       dev->name, qidx);
+               return NETDEV_TX_BUSY;
+       }
+
+       if (!t4vf_is_eth_imm(skb) &&
+           unlikely(cxgb4_map_skb(adapter->pdev_dev, skb, addr) < 0)) {
+               /* We need to map the skb into PCI DMA space (because it can't
+                * be in-lined directly into the Work Request) and the mapping
+                * operation failed.  Record the error and drop the packet.
+                */
+               txq->mapping_err++;
+               goto out_free;
+       }
+
+       wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
+       if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+               /* After we're done injecting the Work Request for this
+                * packet, we'll be below our "stop threshold" so stop the TX
+                * Queue now and schedule a request for an SGE Egress Queue
+                * Update message.  The queue will get started later on when
+                * the firmware processes this Work Request and sends us an
+                * Egress Queue Status Update message indicating that space
+                * has opened up.
+                */
+               eth_txq_stop(txq);
+               wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+       }
+
+       /* Start filling in our Work Request.  Note that we do _not_ handle
+        * the WR Header wrapping around the TX Descriptor Ring.  If our
+        * maximum header size ever exceeds one TX Descriptor, we'll need to
+        * do something else here.
+        */
+       WARN_ON(DIV_ROUND_UP(T4VF_ETHTXQ_MAX_HDR, TXD_PER_EQ_UNIT) > 1);
+       wr = (void *)&txq->q.desc[txq->q.pidx];
+       wr->equiq_to_len16 = cpu_to_be32(wr_mid);
+       wr->r3[0] = cpu_to_be32(0);
+       wr->r3[1] = cpu_to_be32(0);
+       skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len);
+       end = (u64 *)wr + flits;
+
+       /* If this is a Large Send Offload packet we'll put in an LSO CPL
+        * message with an encapsulated TX Packet CPL message.  Otherwise we
+        * just use a TX Packet CPL message.
+        */
+       ssi = skb_shinfo(skb);
+       if (ssi->gso_size) {
+               struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
+               bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
+               int l3hdr_len = skb_network_header_len(skb);
+               int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
+
+               wr->op_immdlen =
+                       cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_PKT_VM_WR) |
+                                   FW_WR_IMMDLEN_V(sizeof(*lso) +
+                                                   sizeof(*cpl)));
+                /* Fill in the LSO CPL message. */
+               lso->lso_ctrl =
+                       cpu_to_be32(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
+                                   LSO_FIRST_SLICE_F |
+                                   LSO_LAST_SLICE_F |
+                                   LSO_IPV6_V(v6) |
+                                   LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
+                                   LSO_IPHDR_LEN_V(l3hdr_len / 4) |
+                                   LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
+               lso->ipid_ofst = cpu_to_be16(0);
+               lso->mss = cpu_to_be16(ssi->gso_size);
+               lso->seqno_offset = cpu_to_be32(0);
+               if (is_t4(adapter->params.chip))
+                       lso->len = cpu_to_be32(skb->len);
+               else
+                       lso->len = cpu_to_be32(LSO_T5_XFER_SIZE_V(skb->len));
+
+               /* Set up TX Packet CPL pointer, control word and perform
+                * accounting.
+                */
+               cpl = (void *)(lso + 1);
+
+               if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5)
+                       cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len);
+               else
+                       cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len);
+
+               cntrl |= TXPKT_CSUM_TYPE_V(v6 ?
+                                          TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
+                        TXPKT_IPHDR_LEN_V(l3hdr_len);
+               txq->tso++;
+               txq->tx_cso += ssi->gso_segs;
+       } else {
+               int len;
+
+               len = (t4vf_is_eth_imm(skb)
+                      ? skb->len + sizeof(*cpl)
+                      : sizeof(*cpl));
+               wr->op_immdlen =
+                       cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_PKT_VM_WR) |
+                                   FW_WR_IMMDLEN_V(len));
+
+               /* Set up TX Packet CPL pointer, control word and perform
+                * accounting.
+                */
+               cpl = (void *)(wr + 1);
+               if (skb->ip_summed == CHECKSUM_PARTIAL) {
+                       cntrl = hwcsum(adapter->params.chip, skb) |
+                               TXPKT_IPCSUM_DIS_F;
+                       txq->tx_cso++;
+               } else {
+                       cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
+               }
+       }
+
+       /* If there's a VLAN tag present, add that to the list of things to
+        * do in this Work Request.
+        */
+       if (skb_vlan_tag_present(skb)) {
+               txq->vlan_ins++;
+               cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
+       }
+
+        /* Fill in the TX Packet CPL message header. */
+       cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE_V(CPL_TX_PKT_XT) |
+                                TXPKT_INTF_V(pi->port_id) |
+                                TXPKT_PF_V(0));
+       cpl->pack = cpu_to_be16(0);
+       cpl->len = cpu_to_be16(skb->len);
+       cpl->ctrl1 = cpu_to_be64(cntrl);
+
+       /* Fill in the body of the TX Packet CPL message with either in-lined
+        * data or a Scatter/Gather List.
+        */
+       if (t4vf_is_eth_imm(skb)) {
+               /* In-line the packet's data and free the skb since we don't
+                * need it any longer.
+                */
+               cxgb4_inline_tx_skb(skb, &txq->q, cpl + 1);
+               dev_consume_skb_any(skb);
+       } else {
+               /* Write the skb's Scatter/Gather list into the TX Packet CPL
+                * message and retain a pointer to the skb so we can free it
+                * later when its DMA completes.  (We store the skb pointer
+                * in the Software Descriptor corresponding to the last TX
+                * Descriptor used by the Work Request.)
+                *
+                * The retained skb will be freed when the corresponding TX
+                * Descriptors are reclaimed after their DMAs complete.
+                * However, this could take quite a while since, in general,
+                * the hardware is set up to be lazy about sending DMA
+                * completion notifications to us and we mostly perform TX
+                * reclaims in the transmit routine.
+                *
+                * This is good for performamce but means that we rely on new
+                * TX packets arriving to run the destructors of completed
+                * packets, which open up space in their sockets' send queues.
+                * Sometimes we do not get such new packets causing TX to
+                * stall.  A single UDP transmitter is a good example of this
+                * situation.  We have a clean up timer that periodically
+                * reclaims completed packets but it doesn't run often enough
+                * (nor do we want it to) to prevent lengthy stalls.  A
+                * solution to this problem is to run the destructor early,
+                * after the packet is queued but before it's DMAd.  A con is
+                * that we lie to socket memory accounting, but the amount of
+                * extra memory is reasonable (limited by the number of TX
+                * descriptors), the packets do actually get freed quickly by
+                * new packets almost always, and for protocols like TCP that
+                * wait for acks to really free up the data the extra memory
+                * is even less.  On the positive side we run the destructors
+                * on the sending CPU rather than on a potentially different
+                * completing CPU, usually a good thing.
+                *
+                * Run the destructor before telling the DMA engine about the
+                * packet to make sure it doesn't complete and get freed
+                * prematurely.
+                */
+               struct ulptx_sgl *sgl = (struct ulptx_sgl *)(cpl + 1);
+               struct sge_txq *tq = &txq->q;
+               int last_desc;
+
+               /* If the Work Request header was an exact multiple of our TX
+                * Descriptor length, then it's possible that the starting SGL
+                * pointer lines up exactly with the end of our TX Descriptor
+                * ring.  If that's the case, wrap around to the beginning
+                * here ...
+                */
+               if (unlikely((void *)sgl == (void *)tq->stat)) {
+                       sgl = (void *)tq->desc;
+                       end = (void *)((void *)tq->desc +
+                                      ((void *)end - (void *)tq->stat));
+               }
+
+               cxgb4_write_sgl(skb, tq, sgl, end, 0, addr);
+               skb_orphan(skb);
+
+               last_desc = tq->pidx + ndesc - 1;
+               if (last_desc >= tq->size)
+                       last_desc -= tq->size;
+               tq->sdesc[last_desc].skb = skb;
+               tq->sdesc[last_desc].sgl = sgl;
+       }
+
+       /* Advance our internal TX Queue state, tell the hardware about
+        * the new TX descriptors and return success.
+        */
+       txq_advance(&txq->q, ndesc);
+
+       cxgb4_ring_tx_db(adapter, &txq->q, ndesc);
+       return NETDEV_TX_OK;
+
+out_free:
+       /* An error of some sort happened.  Free the TX skb and tell the
+        * OS that we've "dealt" with the packet ...
+        */
+       dev_kfree_skb_any(skb);
+       return NETDEV_TX_OK;
+}
+
+netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct port_info *pi = netdev_priv(dev);
+
+       if (unlikely(pi->eth_flags & PRIV_FLAG_PORT_TX_VM))
+               return cxgb4_vf_eth_xmit(skb, dev);
+
+       return cxgb4_eth_xmit(skb, dev);
+}
+
 /**
  *     reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
  *     @q: the SGE control Tx queue
@@ -3044,7 +3412,9 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
        c.iqsize = htons(iq->size);
        c.iqaddr = cpu_to_be64(iq->phys_addr);
        if (cong >= 0)
-               c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F);
+               c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F |
+                               FW_IQ_CMD_IQTYPE_V(cong ? FW_IQ_IQTYPE_NIC
+                                                       :  FW_IQ_IQTYPE_OFLD));
 
        if (fl) {
                enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
index 3720c3e11ebb883466d04b4a2169878f0b135399..2d9943f90a75542171f1bd91f6986d1e0cb02a87 100644 (file)
@@ -2882,6 +2882,57 @@ int t4_get_vpd_params(struct adapter *adapter, struct vpd_params *p)
        return 0;
 }
 
+/**
+ *     t4_get_pfres - retrieve VF resource limits
+ *     @adapter: the adapter
+ *
+ *     Retrieves configured resource limits and capabilities for a physical
+ *     function.  The results are stored in @adapter->pfres.
+ */
+int t4_get_pfres(struct adapter *adapter)
+{
+       struct pf_resources *pfres = &adapter->params.pfres;
+       struct fw_pfvf_cmd cmd, rpl;
+       int v;
+       u32 word;
+
+       /* Execute PFVF Read command to get VF resource limits; bail out early
+        * with error on command failure.
+        */
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_PFVF_CMD) |
+                                   FW_CMD_REQUEST_F |
+                                   FW_CMD_READ_F |
+                                   FW_PFVF_CMD_PFN_V(adapter->pf) |
+                                   FW_PFVF_CMD_VFN_V(0));
+       cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
+       v = t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &rpl);
+       if (v != FW_SUCCESS)
+               return v;
+
+       /* Extract PF resource limits and return success.
+        */
+       word = be32_to_cpu(rpl.niqflint_niq);
+       pfres->niqflint = FW_PFVF_CMD_NIQFLINT_G(word);
+       pfres->niq = FW_PFVF_CMD_NIQ_G(word);
+
+       word = be32_to_cpu(rpl.type_to_neq);
+       pfres->neq = FW_PFVF_CMD_NEQ_G(word);
+       pfres->pmask = FW_PFVF_CMD_PMASK_G(word);
+
+       word = be32_to_cpu(rpl.tc_to_nexactf);
+       pfres->tc = FW_PFVF_CMD_TC_G(word);
+       pfres->nvi = FW_PFVF_CMD_NVI_G(word);
+       pfres->nexactf = FW_PFVF_CMD_NEXACTF_G(word);
+
+       word = be32_to_cpu(rpl.r_caps_to_nethctrl);
+       pfres->r_caps = FW_PFVF_CMD_R_CAPS_G(word);
+       pfres->wx_caps = FW_PFVF_CMD_WX_CAPS_G(word);
+       pfres->nethctrl = FW_PFVF_CMD_NETHCTRL_G(word);
+
+       return 0;
+}
+
 /* serial flash and firmware constants */
 enum {
        SF_ATTEMPTS = 10,             /* max retries for SF operations */
index c7f8d0441278fb19b1283d49c54265075bf89cf2..e3adf435913ed60646416770449d3aa589ddd1f2 100644 (file)
@@ -188,6 +188,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
        CH_PCI_ID_TABLE_FENTRY(0x50ab), /* Custom T520-CR */
        CH_PCI_ID_TABLE_FENTRY(0x50ac), /* Custom T540-BT */
        CH_PCI_ID_TABLE_FENTRY(0x50ad), /* Custom T520-CR */
+       CH_PCI_ID_TABLE_FENTRY(0x50ae), /* Custom T540-XL-SO */
 
        /* T6 adapters:
         */
index 6b55aa2eb2a5a8be6e93bd82ca6c535086eb307a..eb222d40ddbf32807b17c45cd42f5271d21ba12e 100644 (file)
 #define TP_MIB_DATA_A  0x7e54
 #define TP_INT_CAUSE_A 0x7e74
 
+#define TP_FLM_FREE_PS_CNT_A 0x7e80
+#define TP_FLM_FREE_RX_CNT_A 0x7e84
+
+#define FREEPSTRUCTCOUNT_S    0
+#define FREEPSTRUCTCOUNT_M    0x1fffffU
+#define FREEPSTRUCTCOUNT_G(x) (((x) >> FREEPSTRUCTCOUNT_S) & FREEPSTRUCTCOUNT_M)
+
+#define FREERXPAGECOUNT_S    0
+#define FREERXPAGECOUNT_M    0x1fffffU
+#define FREERXPAGECOUNT_V(x) ((x) << FREERXPAGECOUNT_S)
+#define FREERXPAGECOUNT_G(x) (((x) >> FREERXPAGECOUNT_S) & FREERXPAGECOUNT_M)
+
+#define TP_FLM_FREE_TX_CNT_A 0x7e88
+
+#define FREETXPAGECOUNT_S    0
+#define FREETXPAGECOUNT_M    0x1fffffU
+#define FREETXPAGECOUNT_V(x) ((x) << FREETXPAGECOUNT_S)
+#define FREETXPAGECOUNT_G(x) (((x) >> FREETXPAGECOUNT_S) & FREETXPAGECOUNT_M)
+
 #define FLMTXFLSTEMPTY_S    30
 #define FLMTXFLSTEMPTY_V(x) ((x) << FLMTXFLSTEMPTY_S)
 #define FLMTXFLSTEMPTY_F    FLMTXFLSTEMPTY_V(1U)
 #define ULP_TX_LA_RDPTR_0_A 0x8ec0
 #define ULP_TX_LA_RDDATA_0_A 0x8ec4
 #define ULP_TX_LA_WRPTR_0_A 0x8ec8
+#define ULP_TX_ASIC_DEBUG_CTRL_A 0x8f70
+
+#define ULP_TX_ASIC_DEBUG_0_A 0x8f74
+#define ULP_TX_ASIC_DEBUG_1_A 0x8f78
+#define ULP_TX_ASIC_DEBUG_2_A 0x8f7c
+#define ULP_TX_ASIC_DEBUG_3_A 0x8f80
+#define ULP_TX_ASIC_DEBUG_4_A 0x8f84
+
+/* registers for module PM_RX */
+#define PM_RX_BASE_ADDR 0x8fc0
 
 #define PMRX_E_PCMD_PAR_ERROR_S    0
 #define PMRX_E_PCMD_PAR_ERROR_V(x) ((x) << PMRX_E_PCMD_PAR_ERROR_S)
index f1967cf6d43c4b614aba152b7e6e53d0e8b5e644..5dc6c4154af8a6e5961290f84e31e9a10d551598 100644 (file)
@@ -1472,6 +1472,12 @@ enum fw_iq_type {
        FW_IQ_TYPE_NO_FL_INT_CAP
 };
 
+enum fw_iq_iqtype {
+       FW_IQ_IQTYPE_OTHER,
+       FW_IQ_IQTYPE_NIC,
+       FW_IQ_IQTYPE_OFLD,
+};
+
 struct fw_iq_cmd {
        __be32 op_to_vfn;
        __be32 alloc_to_len16;
@@ -1586,6 +1592,12 @@ struct fw_iq_cmd {
 #define FW_IQ_CMD_IQFLINTISCSIC_S      26
 #define FW_IQ_CMD_IQFLINTISCSIC_V(x)   ((x) << FW_IQ_CMD_IQFLINTISCSIC_S)
 
+#define FW_IQ_CMD_IQTYPE_S             24
+#define FW_IQ_CMD_IQTYPE_M             0x3
+#define FW_IQ_CMD_IQTYPE_V(x)          ((x) << FW_IQ_CMD_IQTYPE_S)
+#define FW_IQ_CMD_IQTYPE_G(x)          \
+       (((x) >> FW_IQ_CMD_IQTYPE_S) & FW_IQ_CMD_IQTYPE_M)
+
 #define FW_IQ_CMD_FL0CNGCHMAP_S                20
 #define FW_IQ_CMD_FL0CNGCHMAP_V(x)     ((x) << FW_IQ_CMD_FL0CNGCHMAP_S)
 
index 0ed1616423711d226e2433f6c0c5ccd683e589b6..74849be5f004f59552892cf642a9b02efb393ac7 100644 (file)
@@ -412,12 +412,10 @@ int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
                        ppmax * (sizeof(struct cxgbi_ppod_data)) +
                        ppod_bmap_size * sizeof(unsigned long);
 
-       ppm = vmalloc(alloc_sz);
+       ppm = vzalloc(alloc_sz);
        if (!ppm)
                goto release_ppm_pool;
 
-       memset(ppm, 0, alloc_sz);
-
        ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]);
 
        if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) {
index e9db811df59c01f0c0dc96827039936273159401..901e44b0b795bd69744b91ef31239016289d96bf 100644 (file)
@@ -1071,7 +1071,7 @@ struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
        unsigned int num_bars)
 {
        if (!vdev) {
-               vdev = kzalloc(sizeof(struct vnic_dev), GFP_ATOMIC);
+               vdev = kzalloc(sizeof(struct vnic_dev), GFP_KERNEL);
                if (!vdev)
                        return NULL;
        }
index f8aa326d1d585debe34636bfbd1a17868b5c9f7e..a3e7b003ada1c2d9005fba845db36cf9d3046e65 100644 (file)
@@ -35,7 +35,7 @@ static int vnic_rq_alloc_bufs(struct vnic_rq *rq)
        unsigned int blks = VNIC_RQ_BUF_BLKS_NEEDED(count);
 
        for (i = 0; i < blks; i++) {
-               rq->bufs[i] = kzalloc(VNIC_RQ_BUF_BLK_SZ(count), GFP_ATOMIC);
+               rq->bufs[i] = kzalloc(VNIC_RQ_BUF_BLK_SZ(count), GFP_KERNEL);
                if (!rq->bufs[i])
                        return -ENOMEM;
        }
index 090cc65658a3237beeb0fc16e3501e77a2d69c57..eb75891974dffbb63bcd7c5b8b4da5bb91260262 100644 (file)
@@ -35,7 +35,7 @@ static int vnic_wq_alloc_bufs(struct vnic_wq *wq)
        unsigned int blks = VNIC_WQ_BUF_BLKS_NEEDED(count);
 
        for (i = 0; i < blks; i++) {
-               wq->bufs[i] = kzalloc(VNIC_WQ_BUF_BLK_SZ(count), GFP_ATOMIC);
+               wq->bufs[i] = kzalloc(VNIC_WQ_BUF_BLK_SZ(count), GFP_KERNEL);
                if (!wq->bufs[i])
                        return -ENOMEM;
        }
index 6d7404f66f84af7322c6b58def8cbff94958ca12..1c9ad3630c7754b692079c19ffafec7c07922c80 100644 (file)
 #define DRV_NAME               "gmac-gemini"
 #define DRV_VERSION            "1.0"
 
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
 #define HSIZE_8                        0x00
 #define HSIZE_16               0x01
 #define HSIZE_32               0x02
@@ -146,6 +151,7 @@ struct gemini_ethernet {
        void __iomem *base;
        struct gemini_ethernet_port *port0;
        struct gemini_ethernet_port *port1;
+       bool initialized;
 
        spinlock_t      irq_lock; /* Locks IRQ-related registers */
        unsigned int    freeq_order;
@@ -300,23 +306,26 @@ static void gmac_speed_set(struct net_device *netdev)
                status.bits.speed = GMAC_SPEED_1000;
                if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
                        status.bits.mii_rmii = GMAC_PHY_RGMII_1000;
-               netdev_info(netdev, "connect to RGMII @ 1Gbit\n");
+               netdev_dbg(netdev, "connect %s to RGMII @ 1Gbit\n",
+                          phydev_name(phydev));
                break;
        case 100:
                status.bits.speed = GMAC_SPEED_100;
                if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
                        status.bits.mii_rmii = GMAC_PHY_RGMII_100_10;
-               netdev_info(netdev, "connect to RGMII @ 100 Mbit\n");
+               netdev_dbg(netdev, "connect %s to RGMII @ 100 Mbit\n",
+                          phydev_name(phydev));
                break;
        case 10:
                status.bits.speed = GMAC_SPEED_10;
                if (phydev->interface == PHY_INTERFACE_MODE_RGMII)
                        status.bits.mii_rmii = GMAC_PHY_RGMII_100_10;
-               netdev_info(netdev, "connect to RGMII @ 10 Mbit\n");
+               netdev_dbg(netdev, "connect %s to RGMII @ 10 Mbit\n",
+                          phydev_name(phydev));
                break;
        default:
-               netdev_warn(netdev, "Not supported PHY speed (%d)\n",
-                           phydev->speed);
+               netdev_warn(netdev, "Unsupported PHY speed (%d) on %s\n",
+                           phydev->speed, phydev_name(phydev));
        }
 
        if (phydev->duplex == DUPLEX_FULL) {
@@ -363,12 +372,6 @@ static int gmac_setup_phy(struct net_device *netdev)
                return -ENODEV;
        netdev->phydev = phy;
 
-       netdev_info(netdev, "connected to PHY \"%s\"\n",
-                   phydev_name(phy));
-       phy_attached_print(phy, "phy_id=0x%.8lx, phy_mode=%s\n",
-                          (unsigned long)phy->phy_id,
-                          phy_modes(phy->interface));
-
        phy->supported &= PHY_GBIT_FEATURES;
        phy->supported |= SUPPORTED_Asym_Pause | SUPPORTED_Pause;
        phy->advertising = phy->supported;
@@ -376,19 +379,19 @@ static int gmac_setup_phy(struct net_device *netdev)
        /* set PHY interface type */
        switch (phy->interface) {
        case PHY_INTERFACE_MODE_MII:
-               netdev_info(netdev, "set GMAC0 to GMII mode, GMAC1 disabled\n");
+               netdev_dbg(netdev,
+                          "MII: set GMAC0 to GMII mode, GMAC1 disabled\n");
                status.bits.mii_rmii = GMAC_PHY_MII;
-               netdev_info(netdev, "connect to MII\n");
                break;
        case PHY_INTERFACE_MODE_GMII:
-               netdev_info(netdev, "set GMAC0 to GMII mode, GMAC1 disabled\n");
+               netdev_dbg(netdev,
+                          "GMII: set GMAC0 to GMII mode, GMAC1 disabled\n");
                status.bits.mii_rmii = GMAC_PHY_GMII;
-               netdev_info(netdev, "connect to GMII\n");
                break;
        case PHY_INTERFACE_MODE_RGMII:
-               dev_info(dev, "set GMAC0 and GMAC1 to MII/RGMII mode\n");
+               netdev_dbg(netdev,
+                          "RGMII: set GMAC0 and GMAC1 to MII/RGMII mode\n");
                status.bits.mii_rmii = GMAC_PHY_RGMII_100_10;
-               netdev_info(netdev, "connect to RGMII\n");
                break;
        default:
                netdev_err(netdev, "Unsupported MII interface\n");
@@ -398,29 +401,63 @@ static int gmac_setup_phy(struct net_device *netdev)
        }
        writel(status.bits32, port->gmac_base + GMAC_STATUS);
 
+       if (netif_msg_link(port))
+               phy_attached_info(phy);
+
        return 0;
 }
 
-static int gmac_pick_rx_max_len(int max_l3_len)
-{
-       /* index = CONFIG_MAXLEN_XXX values */
-       static const int max_len[8] = {
-               1536, 1518, 1522, 1542,
-               9212, 10236, 1518, 1518
-       };
-       int i, n = 5;
+/* The maximum frame length is not logically enumerated in the
+ * hardware, so we do a table lookup to find the applicable max
+ * frame length.
+ */
+struct gmac_max_framelen {
+       unsigned int max_l3_len;
+       u8 val;
+};
+
+static const struct gmac_max_framelen gmac_maxlens[] = {
+       {
+               .max_l3_len = 1518,
+               .val = CONFIG0_MAXLEN_1518,
+       },
+       {
+               .max_l3_len = 1522,
+               .val = CONFIG0_MAXLEN_1522,
+       },
+       {
+               .max_l3_len = 1536,
+               .val = CONFIG0_MAXLEN_1536,
+       },
+       {
+               .max_l3_len = 1542,
+               .val = CONFIG0_MAXLEN_1542,
+       },
+       {
+               .max_l3_len = 9212,
+               .val = CONFIG0_MAXLEN_9k,
+       },
+       {
+               .max_l3_len = 10236,
+               .val = CONFIG0_MAXLEN_10k,
+       },
+};
 
-       max_l3_len += ETH_HLEN + VLAN_HLEN;
+static int gmac_pick_rx_max_len(unsigned int max_l3_len)
+{
+       const struct gmac_max_framelen *maxlen;
+       int maxtot;
+       int i;
 
-       if (max_l3_len > max_len[n])
-               return -1;
+       maxtot = max_l3_len + ETH_HLEN + VLAN_HLEN;
 
-       for (i = 0; i < 5; i++) {
-               if (max_len[i] >= max_l3_len && max_len[i] < max_len[n])
-                       n = i;
+       for (i = 0; i < ARRAY_SIZE(gmac_maxlens); i++) {
+               maxlen = &gmac_maxlens[i];
+               if (maxtot <= maxlen->max_l3_len)
+                       return maxlen->val;
        }
 
-       return n;
+       return -1;
 }
 
 static int gmac_init(struct net_device *netdev)
@@ -1276,8 +1313,8 @@ static void gmac_enable_irq(struct net_device *netdev, int enable)
        unsigned long flags;
        u32 val, mask;
 
-       netdev_info(netdev, "%s device %d %s\n", __func__,
-                   netdev->dev_id, enable ? "enable" : "disable");
+       netdev_dbg(netdev, "%s device %d %s\n", __func__,
+                  netdev->dev_id, enable ? "enable" : "disable");
        spin_lock_irqsave(&geth->irq_lock, flags);
 
        mask = GMAC0_IRQ0_2 << (netdev->dev_id * 2);
@@ -1753,7 +1790,10 @@ static int gmac_open(struct net_device *netdev)
        phy_start(netdev->phydev);
 
        err = geth_resize_freeq(port);
-       if (err) {
+       /* It's fine if it's just busy, the other port has set up
+        * the freeq in that case.
+        */
+       if (err && (err != -EBUSY)) {
                netdev_err(netdev, "could not resize freeq\n");
                goto err_stop_phy;
        }
@@ -1782,7 +1822,7 @@ static int gmac_open(struct net_device *netdev)
                     HRTIMER_MODE_REL);
        port->rx_coalesce_timer.function = &gmac_coalesce_delay_expired;
 
-       netdev_info(netdev, "opened\n");
+       netdev_dbg(netdev, "opened\n");
 
        return 0;
 
@@ -2264,6 +2304,14 @@ static void gemini_port_remove(struct gemini_ethernet_port *port)
 
 static void gemini_ethernet_init(struct gemini_ethernet *geth)
 {
+       /* Only do this once both ports are online */
+       if (geth->initialized)
+               return;
+       if (geth->port0 && geth->port1)
+               geth->initialized = true;
+       else
+               return;
+
        writel(0, geth->base + GLOBAL_INTERRUPT_ENABLE_0_REG);
        writel(0, geth->base + GLOBAL_INTERRUPT_ENABLE_1_REG);
        writel(0, geth->base + GLOBAL_INTERRUPT_ENABLE_2_REG);
@@ -2354,6 +2402,7 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
        port->id = id;
        port->geth = geth;
        port->dev = dev;
+       port->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
        /* DMA memory */
        dmares = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2410,6 +2459,10 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
                geth->port0 = port;
        else
                geth->port1 = port;
+
+       /* This will just be done once both ports are up and reset */
+       gemini_ethernet_init(geth);
+
        platform_set_drvdata(pdev, port);
 
        /* Set up and register the netdev */
@@ -2423,6 +2476,11 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 
        netdev->hw_features = GMAC_OFFLOAD_FEATURES;
        netdev->features |= GMAC_OFFLOAD_FEATURES | NETIF_F_GRO;
+       /* We can handle jumbo frames up to 10236 bytes so, let's accept
+        * payloads of 10236 bytes minus VLAN and ethernet header
+        */
+       netdev->min_mtu = ETH_MIN_MTU;
+       netdev->max_mtu = 10236 - VLAN_ETH_HLEN;
 
        port->freeq_refill = 0;
        netif_napi_add(netdev, &port->napi, gmac_napi_poll,
@@ -2435,7 +2493,7 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
                        port->mac_addr[0], port->mac_addr[1],
                        port->mac_addr[2]);
                dev_info(dev, "using a random ethernet address\n");
-               random_ether_addr(netdev->dev_addr);
+               eth_random_addr(netdev->dev_addr);
        }
        gmac_write_mac_address(netdev);
 
@@ -2527,7 +2585,6 @@ static int gemini_ethernet_probe(struct platform_device *pdev)
 
        spin_lock_init(&geth->irq_lock);
        spin_lock_init(&geth->freeq_lock);
-       gemini_ethernet_init(geth);
 
        /* The children will use this */
        platform_set_drvdata(pdev, geth);
@@ -2540,8 +2597,8 @@ static int gemini_ethernet_remove(struct platform_device *pdev)
 {
        struct gemini_ethernet *geth = platform_get_drvdata(pdev);
 
-       gemini_ethernet_init(geth);
        geth_cleanup_freeq(geth);
+       geth->initialized = false;
 
        return 0;
 }
index 382891f81e0932a273ca3de26f95c1a685bea64e..d80fe03d31070dcc56a53854698e42d3c04fd87a 100644 (file)
@@ -37,7 +37,7 @@
 #include "be_hw.h"
 #include "be_roce.h"
 
-#define DRV_VER                        "11.4.0.0"
+#define DRV_VER                        "12.0.0.0"
 #define DRV_NAME               "be2net"
 #define BE_NAME                        "Emulex BladeEngine2"
 #define BE3_NAME               "Emulex BladeEngine3"
@@ -185,34 +185,13 @@ static inline void queue_tail_inc(struct be_queue_info *q)
 
 struct be_eq_obj {
        struct be_queue_info q;
-       char desc[32];
-
-       /* Adaptive interrupt coalescing (AIC) info */
-       bool enable_aic;
-       u32 min_eqd;            /* in usecs */
-       u32 max_eqd;            /* in usecs */
-       u32 eqd;                /* configured val when aic is off */
-       u32 cur_eqd;            /* in usecs */
 
+       struct be_adapter *adapter;
+       struct napi_struct napi;
        u8 idx;                 /* array index */
        u8 msix_idx;
        u16 spurious_intr;
-       struct napi_struct napi;
-       struct be_adapter *adapter;
        cpumask_var_t  affinity_mask;
-
-#ifdef CONFIG_NET_RX_BUSY_POLL
-#define BE_EQ_IDLE             0
-#define BE_EQ_NAPI             1       /* napi owns this EQ */
-#define BE_EQ_POLL             2       /* poll owns this EQ */
-#define BE_EQ_LOCKED           (BE_EQ_NAPI | BE_EQ_POLL)
-#define BE_EQ_NAPI_YIELD       4       /* napi yielded this EQ */
-#define BE_EQ_POLL_YIELD       8       /* poll yielded this EQ */
-#define BE_EQ_YIELD            (BE_EQ_NAPI_YIELD | BE_EQ_POLL_YIELD)
-#define BE_EQ_USER_PEND                (BE_EQ_POLL | BE_EQ_POLL_YIELD)
-       unsigned int state;
-       spinlock_t lock;        /* lock to serialize napi and busy-poll */
-#endif  /* CONFIG_NET_RX_BUSY_POLL */
 } ____cacheline_aligned_in_smp;
 
 struct be_aic_obj {            /* Adaptive interrupt coalescing (AIC) info */
@@ -238,7 +217,6 @@ struct be_tx_stats {
        u64 tx_vxlan_offload_pkts;
        u64 tx_reqs;
        u64 tx_compl;
-       ulong tx_jiffies;
        u32 tx_stops;
        u32 tx_drv_drops;       /* pkts dropped by driver */
        /* the error counters are described in be_ethtool.c */
@@ -261,9 +239,9 @@ struct be_tx_compl_info {
 
 struct be_tx_obj {
        u32 db_offset;
+       struct be_tx_compl_info txcp;
        struct be_queue_info q;
        struct be_queue_info cq;
-       struct be_tx_compl_info txcp;
        /* Remember the skbs that were transmitted */
        struct sk_buff *sent_skb_list[TX_Q_LEN];
        struct be_tx_stats stats;
@@ -458,10 +436,10 @@ struct be_port_resources {
 #define be_is_os2bmc_enabled(adapter) (adapter->flags & BE_FLAGS_OS2BMC)
 
 struct rss_info {
-       u64 rss_flags;
        u8 rsstable[RSS_INDIR_TABLE_LEN];
        u8 rss_queue[RSS_INDIR_TABLE_LEN];
        u8 rss_hkey[RSS_HASH_KEY_LEN];
+       u64 rss_flags;
 };
 
 #define BE_INVALID_DIE_TEMP    0xFF
@@ -544,11 +522,13 @@ enum {
 };
 
 struct be_error_recovery {
-       /* Lancer error recovery variables */
-       u8 recovery_retries;
+       union {
+               u8 recovery_retries;    /* used for Lancer              */
+               u8 recovery_state;      /* used for BEx and Skyhawk     */
+       };
 
        /* BEx/Skyhawk error recovery variables */
-       u8 recovery_state;
+       bool recovery_supported;
        u16 ue_to_reset_time;           /* Time after UE, to soft reset
                                         * the chip - PF0 only
                                         */
@@ -556,7 +536,6 @@ struct be_error_recovery {
                                         * of SLIPORT_SEMAPHORE reg
                                         */
        u16 last_err_code;
-       bool recovery_supported;
        unsigned long probe_time;
        unsigned long last_recovery_time;
 
index 8f755009ff3820e34fa1501970054513cf5198ce..d0b9415d9ae71615e06cf6b1a9e8dc1d49beee80 100644 (file)
@@ -1412,6 +1412,83 @@ drop:
        return NETDEV_TX_OK;
 }
 
+static void be_tx_timeout(struct net_device *netdev)
+{
+       struct be_adapter *adapter = netdev_priv(netdev);
+       struct device *dev = &adapter->pdev->dev;
+       struct be_tx_obj *txo;
+       struct sk_buff *skb;
+       struct tcphdr *tcphdr;
+       struct udphdr *udphdr;
+       u32 *entry;
+       int status;
+       int i, j;
+
+       for_all_tx_queues(adapter, txo, i) {
+               dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
+                        i, txo->q.head, txo->q.tail,
+                        atomic_read(&txo->q.used), txo->q.id);
+
+               entry = txo->q.dma_mem.va;
+               for (j = 0; j < TX_Q_LEN * 4; j += 4) {
+                       if (entry[j] != 0 || entry[j + 1] != 0 ||
+                           entry[j + 2] != 0 || entry[j + 3] != 0) {
+                               dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
+                                        j, entry[j], entry[j + 1],
+                                        entry[j + 2], entry[j + 3]);
+                       }
+               }
+
+               entry = txo->cq.dma_mem.va;
+               dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
+                        i, txo->cq.head, txo->cq.tail,
+                        atomic_read(&txo->cq.used));
+               for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
+                       if (entry[j] != 0 || entry[j + 1] != 0 ||
+                           entry[j + 2] != 0 || entry[j + 3] != 0) {
+                               dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
+                                        j, entry[j], entry[j + 1],
+                                        entry[j + 2], entry[j + 3]);
+                       }
+               }
+
+               for (j = 0; j < TX_Q_LEN; j++) {
+                       if (txo->sent_skb_list[j]) {
+                               skb = txo->sent_skb_list[j];
+                               if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
+                                       tcphdr = tcp_hdr(skb);
+                                       dev_info(dev, "TCP source port %d\n",
+                                                ntohs(tcphdr->source));
+                                       dev_info(dev, "TCP dest port %d\n",
+                                                ntohs(tcphdr->dest));
+                                       dev_info(dev, "TCP sequence num %d\n",
+                                                ntohs(tcphdr->seq));
+                                       dev_info(dev, "TCP ack_seq %d\n",
+                                                ntohs(tcphdr->ack_seq));
+                               } else if (ip_hdr(skb)->protocol ==
+                                          IPPROTO_UDP) {
+                                       udphdr = udp_hdr(skb);
+                                       dev_info(dev, "UDP source port %d\n",
+                                                ntohs(udphdr->source));
+                                       dev_info(dev, "UDP dest port %d\n",
+                                                ntohs(udphdr->dest));
+                               }
+                               dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
+                                        j, skb, skb->len, skb->protocol);
+                       }
+               }
+       }
+
+       if (lancer_chip(adapter)) {
+               dev_info(dev, "Initiating reset due to tx timeout\n");
+               dev_info(dev, "Resetting adapter\n");
+               status = lancer_physdev_ctrl(adapter,
+                                            PHYSDEV_CONTROL_FW_RESET_MASK);
+               if (status)
+                       dev_err(dev, "Reset failed .. Reboot server\n");
+       }
+}
+
 static inline bool be_in_all_promisc(struct be_adapter *adapter)
 {
        return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
@@ -3274,7 +3351,7 @@ void be_detect_error(struct be_adapter *adapter)
                        /* Do not log error messages if its a FW reset */
                        if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
                            sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
-                               dev_info(dev, "Firmware update in progress\n");
+                               dev_info(dev, "Reset is in progress\n");
                        } else {
                                dev_err(dev, "Error detected in the card\n");
                                dev_err(dev, "ERR: sliport status 0x%x\n",
@@ -3403,9 +3480,11 @@ static int be_msix_register(struct be_adapter *adapter)
        int status, i, vec;
 
        for_all_evt_queues(adapter, eqo, i) {
-               sprintf(eqo->desc, "%s-q%d", netdev->name, i);
+               char irq_name[IFNAMSIZ+4];
+
+               snprintf(irq_name, sizeof(irq_name), "%s-q%d", netdev->name, i);
                vec = be_msix_vec_get(adapter, eqo);
-               status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
+               status = request_irq(vec, be_msix, 0, irq_name, eqo);
                if (status)
                        goto err_msix;
 
@@ -5216,6 +5295,7 @@ static const struct net_device_ops be_netdev_ops = {
        .ndo_get_vf_config      = be_get_vf_config,
        .ndo_set_vf_link_state  = be_set_vf_link_state,
        .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
+       .ndo_tx_timeout         = be_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = be_netpoll,
 #endif
index ab02057ac7304f088242a2a07481820302d3556b..65a22cd9aef26197f79877862756133b4109b895 100644 (file)
@@ -1171,7 +1171,7 @@ static int dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
        buf_prefix_content.priv_data_size = buf_layout->priv_data_size;
        buf_prefix_content.pass_prs_result = true;
        buf_prefix_content.pass_hash_result = true;
-       buf_prefix_content.pass_time_stamp = false;
+       buf_prefix_content.pass_time_stamp = true;
        buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
 
        params.specific_params.non_rx_params.err_fqid = errq->fqid;
@@ -1213,7 +1213,7 @@ static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
        buf_prefix_content.priv_data_size = buf_layout->priv_data_size;
        buf_prefix_content.pass_prs_result = true;
        buf_prefix_content.pass_hash_result = true;
-       buf_prefix_content.pass_time_stamp = false;
+       buf_prefix_content.pass_time_stamp = true;
        buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
 
        rx_p = &params.specific_params.rx_params;
@@ -1610,14 +1610,28 @@ static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
 {
        const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
        struct device *dev = priv->net_dev->dev.parent;
+       struct skb_shared_hwtstamps shhwtstamps;
        dma_addr_t addr = qm_fd_addr(fd);
        const struct qm_sg_entry *sgt;
        struct sk_buff **skbh, *skb;
        int nr_frags, i;
+       u64 ns;
 
        skbh = (struct sk_buff **)phys_to_virt(addr);
        skb = *skbh;
 
+       if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+               memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+
+               if (!fman_port_get_tstamp(priv->mac_dev->port[TX], (void *)skbh,
+                                         &ns)) {
+                       shhwtstamps.hwtstamp = ns_to_ktime(ns);
+                       skb_tstamp_tx(skb, &shhwtstamps);
+               } else {
+                       dev_warn(dev, "fman_port_get_tstamp failed!\n");
+               }
+       }
+
        if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) {
                nr_frags = skb_shinfo(skb)->nr_frags;
                dma_unmap_single(dev, addr,
@@ -2087,6 +2101,11 @@ static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
        if (unlikely(err < 0))
                goto skb_to_fd_failed;
 
+       if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+               fd.cmd |= cpu_to_be32(FM_FD_CMD_UPD);
+               skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+       }
+
        if (likely(dpaa_xmit(priv, percpu_stats, queue_mapping, &fd) == 0))
                return NETDEV_TX_OK;
 
@@ -2228,6 +2247,7 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
                                                struct qman_fq *fq,
                                                const struct qm_dqrr_entry *dq)
 {
+       struct skb_shared_hwtstamps *shhwtstamps;
        struct rtnl_link_stats64 *percpu_stats;
        struct dpaa_percpu_priv *percpu_priv;
        const struct qm_fd *fd = &dq->fd;
@@ -2241,6 +2261,7 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
        struct sk_buff *skb;
        int *count_ptr;
        void *vaddr;
+       u64 ns;
 
        fd_status = be32_to_cpu(fd->status);
        fd_format = qm_fd_get_format(fd);
@@ -2305,6 +2326,16 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
        if (!skb)
                return qman_cb_dqrr_consume;
 
+       if (priv->rx_tstamp) {
+               shhwtstamps = skb_hwtstamps(skb);
+               memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+
+               if (!fman_port_get_tstamp(priv->mac_dev->port[RX], vaddr, &ns))
+                       shhwtstamps->hwtstamp = ns_to_ktime(ns);
+               else
+                       dev_warn(net_dev->dev.parent, "fman_port_get_tstamp failed!\n");
+       }
+
        skb->protocol = eth_type_trans(skb, net_dev);
 
        if (net_dev->features & NETIF_F_RXHASH && priv->keygen_in_use &&
@@ -2524,11 +2555,58 @@ static int dpaa_eth_stop(struct net_device *net_dev)
        return err;
 }
 
+static int dpaa_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+       struct dpaa_priv *priv = netdev_priv(dev);
+       struct hwtstamp_config config;
+
+       if (copy_from_user(&config, rq->ifr_data, sizeof(config)))
+               return -EFAULT;
+
+       switch (config.tx_type) {
+       case HWTSTAMP_TX_OFF:
+               /* Couldn't disable rx/tx timestamping separately.
+                * Do nothing here.
+                */
+               priv->tx_tstamp = false;
+               break;
+       case HWTSTAMP_TX_ON:
+               priv->mac_dev->set_tstamp(priv->mac_dev->fman_mac, true);
+               priv->tx_tstamp = true;
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       if (config.rx_filter == HWTSTAMP_FILTER_NONE) {
+               /* Couldn't disable rx/tx timestamping separately.
+                * Do nothing here.
+                */
+               priv->rx_tstamp = false;
+       } else {
+               priv->mac_dev->set_tstamp(priv->mac_dev->fman_mac, true);
+               priv->rx_tstamp = true;
+               /* TS is set for all frame types, not only those requested */
+               config.rx_filter = HWTSTAMP_FILTER_ALL;
+       }
+
+       return copy_to_user(rq->ifr_data, &config, sizeof(config)) ?
+                       -EFAULT : 0;
+}
+
 static int dpaa_ioctl(struct net_device *net_dev, struct ifreq *rq, int cmd)
 {
-       if (!net_dev->phydev)
-               return -EINVAL;
-       return phy_mii_ioctl(net_dev->phydev, rq, cmd);
+       int ret = -EINVAL;
+
+       if (cmd == SIOCGMIIREG) {
+               if (net_dev->phydev)
+                       return phy_mii_ioctl(net_dev->phydev, rq, cmd);
+       }
+
+       if (cmd == SIOCSHWTSTAMP)
+               return dpaa_ts_ioctl(net_dev, rq, cmd);
+
+       return ret;
 }
 
 static const struct net_device_ops dpaa_ops = {
index bd9422082f83c8cffd9e12116a155ac185a190f1..af320f83c742a0c894c1fe4e784fae74f0abdbe4 100644 (file)
@@ -182,6 +182,9 @@ struct dpaa_priv {
 
        struct dpaa_buffer_layout buf_layout[2];
        u16 rx_headroom;
+
+       bool tx_tstamp; /* Tx timestamping enabled */
+       bool rx_tstamp; /* Rx timestamping enabled */
 };
 
 /* from dpaa_ethtool.c */
index 2f933b6b2f4e79b9e1359d8a9b57ba6af4acbfb9..3184c8f7cdd05a3114f0b6c4216fa0c9d8377795 100644 (file)
@@ -32,6 +32,9 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/string.h>
+#include <linux/of_platform.h>
+#include <linux/net_tstamp.h>
+#include <linux/fsl/ptp_qoriq.h>
 
 #include "dpaa_eth.h"
 #include "mac.h"
@@ -515,6 +518,41 @@ static int dpaa_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
        return ret;
 }
 
+static int dpaa_get_ts_info(struct net_device *net_dev,
+                           struct ethtool_ts_info *info)
+{
+       struct device *dev = net_dev->dev.parent;
+       struct device_node *mac_node = dev->of_node;
+       struct device_node *fman_node = NULL, *ptp_node = NULL;
+       struct platform_device *ptp_dev = NULL;
+       struct qoriq_ptp *ptp = NULL;
+
+       info->phc_index = -1;
+
+       fman_node = of_get_parent(mac_node);
+       if (fman_node)
+               ptp_node = of_parse_phandle(fman_node, "ptimer-handle", 0);
+
+       if (ptp_node)
+               ptp_dev = of_find_device_by_node(ptp_node);
+
+       if (ptp_dev)
+               ptp = platform_get_drvdata(ptp_dev);
+
+       if (ptp)
+               info->phc_index = ptp->phc_index;
+
+       info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+                               SOF_TIMESTAMPING_RX_HARDWARE |
+                               SOF_TIMESTAMPING_RAW_HARDWARE;
+       info->tx_types = (1 << HWTSTAMP_TX_OFF) |
+                        (1 << HWTSTAMP_TX_ON);
+       info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+                          (1 << HWTSTAMP_FILTER_ALL);
+
+       return 0;
+}
+
 const struct ethtool_ops dpaa_ethtool_ops = {
        .get_drvinfo = dpaa_get_drvinfo,
        .get_msglevel = dpaa_get_msglevel,
@@ -530,4 +568,5 @@ const struct ethtool_ops dpaa_ethtool_ops = {
        .set_link_ksettings = dpaa_set_link_ksettings,
        .get_rxnfc = dpaa_get_rxnfc,
        .set_rxnfc = dpaa_set_rxnfc,
+       .get_ts_info = dpaa_get_ts_info,
 };
index c729665107f59c4431529b971a765059bee837c1..76366c73583124eaae79bf2594a9517f92675087 100644 (file)
@@ -48,6 +48,7 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/clk.h>
+#include <linux/crc32.h>
 #include <linux/platform_device.h>
 #include <linux/mdio.h>
 #include <linux/phy.h>
@@ -2955,7 +2956,7 @@ static void set_multicast_list(struct net_device *ndev)
 {
        struct fec_enet_private *fep = netdev_priv(ndev);
        struct netdev_hw_addr *ha;
-       unsigned int i, bit, data, crc, tmp;
+       unsigned int crc, tmp;
        unsigned char hash;
        unsigned int hash_high = 0, hash_low = 0;
 
@@ -2983,15 +2984,7 @@ static void set_multicast_list(struct net_device *ndev)
        /* Add the addresses in hash register */
        netdev_for_each_mc_addr(ha, ndev) {
                /* calculate crc32 value of mac address */
-               crc = 0xffffffff;
-
-               for (i = 0; i < ndev->addr_len; i++) {
-                       data = ha->addr[i];
-                       for (bit = 0; bit < 8; bit++, data >>= 1) {
-                               crc = (crc >> 1) ^
-                               (((crc ^ data) & 1) ? CRC32_POLY : 0);
-                       }
-               }
+               crc = ether_crc_le(ndev->addr_len, ha->addr);
 
                /* only upper 6 bits (FEC_HASH_BITS) are used
                 * which point to specific bit in the hash registers
@@ -3136,6 +3129,7 @@ static int fec_enet_init(struct net_device *ndev)
        unsigned dsize = fep->bufdesc_ex ? sizeof(struct bufdesc_ex) :
                        sizeof(struct bufdesc);
        unsigned dsize_log2 = __fls(dsize);
+       int ret;
 
        WARN_ON(dsize != (1 << dsize_log2));
 #if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
@@ -3146,6 +3140,13 @@ static int fec_enet_init(struct net_device *ndev)
        fep->tx_align = 0x3;
 #endif
 
+       /* Check mask of the streaming and coherent API */
+       ret = dma_set_mask_and_coherent(&fep->pdev->dev, DMA_BIT_MASK(32));
+       if (ret < 0) {
+               dev_warn(&fep->pdev->dev, "No suitable DMA available\n");
+               return ret;
+       }
+
        fec_enet_alloc_queue(ndev);
 
        bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize;
index 36c2d7d6ee1b0bb0cd14aa0710eee1ebfea6b942..7e892b1cbd3de951a1721108c4976daa1686edae 100644 (file)
@@ -99,7 +99,6 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
 {
        unsigned long flags;
        u32 val, tempval;
-       int inc;
        struct timespec64 ts;
        u64 ns;
        val = 0;
@@ -114,7 +113,6 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
 
        fep->pps_channel = DEFAULT_PPS_CHANNEL;
        fep->reload_period = PPS_OUPUT_RELOAD_PERIOD;
-       inc = fep->ptp_inc;
 
        spin_lock_irqsave(&fep->tmreg_lock, flags);
 
index 9530405030a70974c92c1ddc2263b00610aab32c..c415ac67cb7bef218d476fc59f7302b83660513b 100644 (file)
@@ -2801,7 +2801,8 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
        of_node_put(muram_node);
        of_node_put(fm_node);
 
-       err = devm_request_irq(&of_dev->dev, irq, fman_irq, 0, "fman", fman);
+       err = devm_request_irq(&of_dev->dev, irq, fman_irq, IRQF_SHARED,
+                              "fman", fman);
        if (err < 0) {
                dev_err(&of_dev->dev, "%s: irq %d allocation failed (error = %d)\n",
                        __func__, irq, err);
index bfa02e0014ae01f4a08a600e0ba27a856c0371f6..935c317fa69642c9707fc583d1f34a12061b390e 100644 (file)
@@ -41,6 +41,7 @@
 /* Frame queue Context Override */
 #define FM_FD_CMD_FCO                   0x80000000
 #define FM_FD_CMD_RPD                   0x40000000  /* Read Prepended Data */
+#define FM_FD_CMD_UPD                  0x20000000  /* Update Prepended Data */
 #define FM_FD_CMD_DTC                   0x10000000  /* Do L4 Checksum */
 
 /* TX-Port: Unsupported Format */
index 57b1e2b47c0a9c68a8bfeb18e166804c3fe0cacd..1ca543ac8f2cd606e5b6e5e727a4f21b96e8550e 100644 (file)
 #define DTSEC_ECNTRL_R100M             0x00000008
 #define DTSEC_ECNTRL_QSGMIIM           0x00000001
 
+#define TCTRL_TTSE                     0x00000040
 #define TCTRL_GTS                      0x00000020
 
 #define RCTRL_PAL_MASK                 0x001f0000
 #define RCTRL_PAL_SHIFT                        16
 #define RCTRL_GHTX                     0x00000400
+#define RCTRL_RTSE                     0x00000040
 #define RCTRL_GRS                      0x00000020
 #define RCTRL_MPROM                    0x00000008
 #define RCTRL_RSF                      0x00000004
@@ -1136,6 +1138,31 @@ int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
        return 0;
 }
 
+int dtsec_set_tstamp(struct fman_mac *dtsec, bool enable)
+{
+       struct dtsec_regs __iomem *regs = dtsec->regs;
+       u32 rctrl, tctrl;
+
+       if (!is_init_done(dtsec->dtsec_drv_param))
+               return -EINVAL;
+
+       rctrl = ioread32be(&regs->rctrl);
+       tctrl = ioread32be(&regs->tctrl);
+
+       if (enable) {
+               rctrl |= RCTRL_RTSE;
+               tctrl |= TCTRL_TTSE;
+       } else {
+               rctrl &= ~RCTRL_RTSE;
+               tctrl &= ~TCTRL_TTSE;
+       }
+
+       iowrite32be(rctrl, &regs->rctrl);
+       iowrite32be(tctrl, &regs->tctrl);
+
+       return 0;
+}
+
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 {
        struct dtsec_regs __iomem *regs = dtsec->regs;
index 1a689adf5a22744a12d81f8ef1f535f1f9d420f4..5149d96ec2c15e80648700a18d6d56cb522751c6 100644 (file)
@@ -56,5 +56,6 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version);
 int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable);
+int dtsec_set_tstamp(struct fman_mac *dtsec, bool enable);
 
 #endif /* __DTSEC_H */
index 446a97b792e3dea467f96c4fed3dfee1cd7840f4..bc6eb30aa20f1736cc49a78531a945740a76a62f 100644 (file)
@@ -964,6 +964,11 @@ int memac_set_allmulti(struct fman_mac *memac, bool enable)
        return 0;
 }
 
+int memac_set_tstamp(struct fman_mac *memac, bool enable)
+{
+       return 0; /* Always enabled. */
+}
+
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 {
        struct memac_regs __iomem *regs = memac->regs;
index b5a50338ed9ae21dd824129295ec868454c1da1a..b2c671ec0ce7909dc4ae0f79e40487430ed8f9e1 100644 (file)
@@ -58,5 +58,6 @@ int memac_set_exception(struct fman_mac *memac,
 int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
 int memac_set_allmulti(struct fman_mac *memac, bool enable);
+int memac_set_tstamp(struct fman_mac *memac, bool enable);
 
 #endif /* __MEMAC_H */
index ecbf6187e13a1fe3d6dba06015ff9cc49aed6224..ee82ee1384eb3160651ce4764f382c3873edaa91 100644 (file)
@@ -1739,6 +1739,18 @@ int fman_port_get_hash_result_offset(struct fman_port *port, u32 *offset)
 }
 EXPORT_SYMBOL(fman_port_get_hash_result_offset);
 
+int fman_port_get_tstamp(struct fman_port *port, const void *data, u64 *tstamp)
+{
+       if (port->buffer_offsets.time_stamp_offset == ILLEGAL_BASE)
+               return -EINVAL;
+
+       *tstamp = be64_to_cpu(*(__be64 *)(data +
+                       port->buffer_offsets.time_stamp_offset));
+
+       return 0;
+}
+EXPORT_SYMBOL(fman_port_get_tstamp);
+
 static int fman_port_probe(struct platform_device *of_dev)
 {
        struct fman_port *port;
index e86ca6a34e4e296051258df2b4d268bf4ade09a6..9dbb69f4012160fb744372351a87c0d3ff595a10 100644 (file)
@@ -153,6 +153,8 @@ u32 fman_port_get_qman_channel_id(struct fman_port *port);
 
 int fman_port_get_hash_result_offset(struct fman_port *port, u32 *offset);
 
+int fman_port_get_tstamp(struct fman_port *port, const void *data, u64 *tstamp);
+
 struct fman_port *fman_port_bind(struct device *dev);
 
 #endif /* __FMAN_PORT_H */
index 284735d4ebe9bbd452fd902c6723a6deb99cbd69..40705938eeccfb4e532d9d2732774934172bac6c 100644 (file)
@@ -44,6 +44,7 @@
 #define TGEC_TX_IPG_LENGTH_MASK        0x000003ff
 
 /* Command and Configuration Register (COMMAND_CONFIG) */
+#define CMD_CFG_EN_TIMESTAMP           0x00100000
 #define CMD_CFG_NO_LEN_CHK             0x00020000
 #define CMD_CFG_PAUSE_IGNORE           0x00000100
 #define CMF_CFG_CRC_FWD                        0x00000040
@@ -588,6 +589,26 @@ int tgec_set_allmulti(struct fman_mac *tgec, bool enable)
        return 0;
 }
 
+int tgec_set_tstamp(struct fman_mac *tgec, bool enable)
+{
+       struct tgec_regs __iomem *regs = tgec->regs;
+       u32 tmp;
+
+       if (!is_init_done(tgec->cfg))
+               return -EINVAL;
+
+       tmp = ioread32be(&regs->command_config);
+
+       if (enable)
+               tmp |= CMD_CFG_EN_TIMESTAMP;
+       else
+               tmp &= ~CMD_CFG_EN_TIMESTAMP;
+
+       iowrite32be(tmp, &regs->command_config);
+
+       return 0;
+}
+
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 {
        struct tgec_regs __iomem *regs = tgec->regs;
index cbbd3b422a98b43a50a0c5a1e33705051f9b02dd..3bfd1062b386dea6ecc2ea3cfa58463a29166cd1 100644 (file)
@@ -52,5 +52,6 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_get_version(struct fman_mac *tgec, u32 *mac_version);
 int tgec_set_allmulti(struct fman_mac *tgec, bool enable);
+int tgec_set_tstamp(struct fman_mac *tgec, bool enable);
 
 #endif /* __TGEC_H */
index 7b5b95f52c098535942f625001847a2ea26dbeb3..a847b9c3b31a9a4e5c9c6f9734973c89d596e4c6 100644 (file)
@@ -471,6 +471,7 @@ static void setup_dtsec(struct mac_device *mac_dev)
        mac_dev->set_rx_pause           = dtsec_accept_rx_pause_frames;
        mac_dev->set_exception          = dtsec_set_exception;
        mac_dev->set_allmulti           = dtsec_set_allmulti;
+       mac_dev->set_tstamp             = dtsec_set_tstamp;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
@@ -490,6 +491,7 @@ static void setup_tgec(struct mac_device *mac_dev)
        mac_dev->set_rx_pause           = tgec_accept_rx_pause_frames;
        mac_dev->set_exception          = tgec_set_exception;
        mac_dev->set_allmulti           = tgec_set_allmulti;
+       mac_dev->set_tstamp             = tgec_set_tstamp;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
@@ -509,6 +511,7 @@ static void setup_memac(struct mac_device *mac_dev)
        mac_dev->set_rx_pause           = memac_accept_rx_pause_frames;
        mac_dev->set_exception          = memac_set_exception;
        mac_dev->set_allmulti           = memac_set_allmulti;
+       mac_dev->set_tstamp             = memac_set_tstamp;
        mac_dev->set_multi              = set_multi;
        mac_dev->start                  = start;
        mac_dev->stop                   = stop;
index b520cec120ee0af8fc571f8820079cffad8e4371..824a81a9f35072cdd39a0a8ba5ba041f20da1301 100644 (file)
@@ -68,6 +68,7 @@ struct mac_device {
        int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
        int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
        int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
+       int (*set_tstamp)(struct fman_mac *mac_dev, bool enable);
        int (*set_multi)(struct net_device *net_dev,
                         struct mac_device *mac_dev);
        int (*set_rx_pause)(struct fman_mac *mac_dev, bool en);
index 1fc27c97e3b23205fe3466ddf9f5bb45f72faf09..99fe2c210d0f6a52f5a549c256872ee6e4c4d52c 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/string.h>
 #include <linux/ptrace.h>
 #include <linux/errno.h>
+#include <linux/crc32.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -176,21 +177,10 @@ static void set_multicast_start(struct net_device *dev)
 static void set_multicast_one(struct net_device *dev, const u8 *mac)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
-       int temp, hash_index, i, j;
+       int temp, hash_index;
        u32 crc, csrVal;
-       u8 byte, msb;
-
-       crc = 0xffffffff;
-       for (i = 0; i < 6; i++) {
-               byte = mac[i];
-               for (j = 0; j < 8; j++) {
-                       msb = crc >> 31;
-                       crc <<= 1;
-                       if (msb ^ (byte & 0x1))
-                               crc ^= FEC_CRC_POLY;
-                       byte >>= 1;
-               }
-       }
+
+       crc = ether_crc(6, mac);
 
        temp = (crc & 0x3f) >> 1;
        hash_index = ((temp & 0x01) << 4) |
index 8cb98cae0a6f506aa7804a5676e7a43570640017..395a5266ea30ad6186b78afd5f1b76f8a138cb84 100644 (file)
@@ -740,7 +740,6 @@ static void ethflow_to_filer_rules (struct gfar_private *priv, u64 ethflow)
 static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow,
                                       u64 class)
 {
-       unsigned int last_rule_idx = priv->cur_filer_idx;
        unsigned int cmp_rqfpr;
        unsigned int *local_rqfpr;
        unsigned int *local_rqfcr;
@@ -819,7 +818,6 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow,
        }
 
        priv->cur_filer_idx = l - 1;
-       last_rule_idx = l;
 
        /* hash rules */
        ethflow_to_filer_rules(priv, ethflow);
index 42fca3208c0bac2e7a72f3f82d3350b85b6d9398..22a817da861e3f62684123b26a81ae59944c22f3 100644 (file)
@@ -3096,6 +3096,7 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
        ugeth_vdbg("%s: IN", __func__);
 
+       netdev_sent_queue(dev, skb->len);
        spin_lock_irqsave(&ugeth->lock, flags);
 
        dev->stats.tx_bytes += skb->len;
@@ -3240,6 +3241,8 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
 {
        /* Start from the next BD that should be filled */
        struct ucc_geth_private *ugeth = netdev_priv(dev);
+       unsigned int bytes_sent = 0;
+       int howmany = 0;
        u8 __iomem *bd;         /* BD pointer */
        u32 bd_status;
 
@@ -3257,7 +3260,8 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
                skb = ugeth->tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]];
                if (!skb)
                        break;
-
+               howmany++;
+               bytes_sent += skb->len;
                dev->stats.tx_packets++;
 
                dev_consume_skb_any(skb);
@@ -3279,6 +3283,7 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
                bd_status = in_be32((u32 __iomem *)bd);
        }
        ugeth->confBd[txQ] = bd;
+       netdev_completed_queue(dev, howmany, bytes_sent);
        return 0;
 }
 
@@ -3479,6 +3484,7 @@ static int ucc_geth_open(struct net_device *dev)
 
        phy_start(ugeth->phydev);
        napi_enable(&ugeth->napi);
+       netdev_reset_queue(dev);
        netif_start_queue(dev);
 
        device_set_wakeup_capable(&dev->dev,
@@ -3509,6 +3515,7 @@ static int ucc_geth_close(struct net_device *dev)
        free_irq(ugeth->ug_info->uf_info.irq, ugeth->ndev);
 
        netif_stop_queue(dev);
+       netdev_reset_queue(dev);
 
        return 0;
 }
index fb1a7251f45d336978199d208af5e1a40eee1556..25152715396bc04eda5e9d793a608ff7ae546071 100644 (file)
@@ -85,10 +85,12 @@ config HNS3
          drivers(like ODP)to register with HNAE devices and their associated
          operations.
 
+if HNS3
+
 config HNS3_HCLGE
        tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support"
+       default m
        depends on PCI_MSI
-       depends on HNS3
        ---help---
          This selects the HNS3_HCLGE network acceleration engine & its hardware
          compatibility layer. The engine would be used in Hisilicon hip08 family of
@@ -97,16 +99,15 @@ config HNS3_HCLGE
 config HNS3_DCB
        bool "Hisilicon HNS3 Data Center Bridge Support"
        default n
-       depends on HNS3 && HNS3_HCLGE && DCB
+       depends on HNS3_HCLGE && DCB
        ---help---
          Say Y here if you want to use Data Center Bridging (DCB) in the HNS3 driver.
 
          If unsure, say N.
 
 config HNS3_HCLGEVF
-    tristate "Hisilicon HNS3VF Acceleration Engine & Compatibility Layer Support"
-    depends on PCI_MSI
-    depends on HNS3
+       tristate "Hisilicon HNS3VF Acceleration Engine & Compatibility Layer Support"
+       depends on PCI_MSI
        depends on HNS3_HCLGE
     ---help---
          This selects the HNS3 VF drivers network acceleration engine & its hardware
@@ -115,11 +116,13 @@ config HNS3_HCLGEVF
 
 config HNS3_ENET
        tristate "Hisilicon HNS3 Ethernet Device Support"
+       default m
        depends on 64BIT && PCI
-       depends on HNS3
        ---help---
          This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08
          family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
          devices and their associated operations.
 
+endif #HNS3
+
 endif # NET_VENDOR_HISILICON
index 340e28211135a266b5a955ef432f6e8786d4f23b..14374a856d3091a9489e92669c7d93e3e797a115 100644 (file)
@@ -904,7 +904,7 @@ static int hip04_mac_probe(struct platform_device *pdev)
                hip04_config_port(ndev, SPEED_100, DUPLEX_FULL);
 
        hip04_config_fifo(priv);
-       random_ether_addr(ndev->dev_addr);
+       eth_random_addr(ndev->dev_addr);
        hip04_update_mac_address(ndev);
 
        ret = hip04_alloc_ring(ndev, d);
index 25a6c8722ecacc981ff8320276c3b41943e8fa31..c5727003af8c1438f9e4824b16f8774b8a383878 100644 (file)
@@ -1006,12 +1006,11 @@ static int hix5hd2_init_hw_desc_queue(struct hix5hd2_priv *priv)
 
        for (i = 0; i < QUEUE_NUMS; i++) {
                size = priv->pool[i].count * sizeof(struct hix5hd2_desc);
-               virt_addr = dma_alloc_coherent(dev, size, &phys_addr,
-                                              GFP_KERNEL);
+               virt_addr = dma_zalloc_coherent(dev, size, &phys_addr,
+                                               GFP_KERNEL);
                if (virt_addr == NULL)
                        goto error_free_pool;
 
-               memset(virt_addr, 0, size);
                priv->pool[i].size = size;
                priv->pool[i].desc = virt_addr;
                priv->pool[i].phys_addr = phys_addr;
index bd68379d2beab60415cf5d071e598b36fd9871ed..e6aad30e7e69cd27c904542b29abaaec908bb9c7 100644 (file)
@@ -70,8 +70,8 @@ static struct ring_pair_cb *hns_ae_get_ring_pair(struct hnae_queue *q)
        return container_of(q, struct ring_pair_cb, q);
 }
 
-struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev,
-                                     u32 port_id)
+static struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev,
+                                            u32 port_id)
 {
        int vfnum_per_port;
        int qnum_per_vf;
@@ -329,7 +329,7 @@ static int hns_ae_start(struct hnae_handle *handle)
        return 0;
 }
 
-void hns_ae_stop(struct hnae_handle *handle)
+static void hns_ae_stop(struct hnae_handle *handle)
 {
        struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
 
@@ -357,7 +357,7 @@ static void hns_ae_reset(struct hnae_handle *handle)
        }
 }
 
-void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
+static void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
 {
        u32 flag;
 
@@ -577,8 +577,8 @@ static void hns_ae_get_coalesce_range(struct hnae_handle *handle,
        *rx_usecs_high  = HNS_RCB_RX_USECS_HIGH;
 }
 
-void hns_ae_update_stats(struct hnae_handle *handle,
-                        struct net_device_stats *net_stats)
+static void hns_ae_update_stats(struct hnae_handle *handle,
+                               struct net_device_stats *net_stats)
 {
        int port;
        int idx;
@@ -660,7 +660,7 @@ void hns_ae_update_stats(struct hnae_handle *handle,
        net_stats->multicast = mac_cb->hw_stats.rx_mc_pkts;
 }
 
-void hns_ae_get_stats(struct hnae_handle *handle, u64 *data)
+static void hns_ae_get_stats(struct hnae_handle *handle, u64 *data)
 {
        int idx;
        struct hns_mac_cb *mac_cb;
@@ -692,8 +692,8 @@ void hns_ae_get_stats(struct hnae_handle *handle, u64 *data)
                hns_dsaf_get_stats(vf_cb->dsaf_dev, p, vf_cb->port_index);
 }
 
-void hns_ae_get_strings(struct hnae_handle *handle,
-                       u32 stringset, u8 *data)
+static void hns_ae_get_strings(struct hnae_handle *handle,
+                              u32 stringset, u8 *data)
 {
        int port;
        int idx;
@@ -725,7 +725,7 @@ void hns_ae_get_strings(struct hnae_handle *handle,
                hns_dsaf_get_strings(stringset, p, port, dsaf_dev);
 }
 
-int hns_ae_get_sset_count(struct hnae_handle *handle, int stringset)
+static int hns_ae_get_sset_count(struct hnae_handle *handle, int stringset)
 {
        u32 sset_count = 0;
        struct hns_mac_cb *mac_cb;
@@ -771,7 +771,7 @@ static int hns_ae_config_loopback(struct hnae_handle *handle,
        return ret;
 }
 
-void hns_ae_update_led_status(struct hnae_handle *handle)
+static void hns_ae_update_led_status(struct hnae_handle *handle)
 {
        struct hns_mac_cb *mac_cb;
 
@@ -783,8 +783,8 @@ void hns_ae_update_led_status(struct hnae_handle *handle)
        hns_set_led_opt(mac_cb);
 }
 
-int hns_ae_cpld_set_led_id(struct hnae_handle *handle,
-                          enum hnae_led_state status)
+static int hns_ae_cpld_set_led_id(struct hnae_handle *handle,
+                                 enum hnae_led_state status)
 {
        struct hns_mac_cb *mac_cb;
 
@@ -795,7 +795,7 @@ int hns_ae_cpld_set_led_id(struct hnae_handle *handle,
        return hns_cpld_led_set_id(mac_cb, status);
 }
 
-void hns_ae_get_regs(struct hnae_handle *handle, void *data)
+static void hns_ae_get_regs(struct hnae_handle *handle, void *data)
 {
        u32 *p = data;
        int i;
@@ -820,7 +820,7 @@ void hns_ae_get_regs(struct hnae_handle *handle, void *data)
                hns_dsaf_get_regs(vf_cb->dsaf_dev, vf_cb->port_index, p);
 }
 
-int hns_ae_get_regs_len(struct hnae_handle *handle)
+static int hns_ae_get_regs_len(struct hnae_handle *handle)
 {
        u32 total_num;
        struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle);
index 74bd260ca02a887869a507f8746dfc928522d4be..5488c6e89f211d355ab33e0cda4033b84dcdccb3 100644 (file)
@@ -339,7 +339,7 @@ static void hns_gmac_init(void *mac_drv)
                           GMAC_TX_WATER_LINE_SHIFT, 8);
 }
 
-void hns_gmac_update_stats(void *mac_drv)
+static void hns_gmac_update_stats(void *mac_drv)
 {
        struct mac_hw_stats *hw_stats = NULL;
        struct mac_driver *drv = (struct mac_driver *)mac_drv;
index 9dcc5765f11fabf48dca14660047ceb19d3c8b49..3545a5d0bc95214ecc5fe320f4b0f8557dce6993 100644 (file)
@@ -458,11 +458,6 @@ int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu, u32 buf_size)
 {
        struct mac_driver *drv = hns_mac_get_drv(mac_cb);
        u32 new_frm = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
-       u32 max_frm = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver) ?
-                       MAC_MAX_MTU : MAC_MAX_MTU_V2;
-
-       if (mac_cb->mac_type == HNAE_PORT_DEBUG)
-               max_frm = MAC_MAX_MTU_DBG;
 
        if (new_frm > HNS_RCB_RING_MAX_BD_PER_PKT * buf_size)
                return -EINVAL;
@@ -931,8 +926,9 @@ static int hns_mac_get_mode(phy_interface_t phy_if)
        }
 }
 
-u8 __iomem *hns_mac_get_vaddr(struct dsaf_device *dsaf_dev,
-                             struct hns_mac_cb *mac_cb, u32 mac_mode_idx)
+static u8 __iomem *
+hns_mac_get_vaddr(struct dsaf_device *dsaf_dev,
+                 struct hns_mac_cb *mac_cb, u32 mac_mode_idx)
 {
        u8 __iomem *base = dsaf_dev->io_base;
        int mac_id = mac_cb->mac_id;
@@ -950,7 +946,8 @@ u8 __iomem *hns_mac_get_vaddr(struct dsaf_device *dsaf_dev,
  * @mac_cb: mac control block
  * return 0 - success , negative --fail
  */
-int hns_mac_get_cfg(struct dsaf_device *dsaf_dev, struct hns_mac_cb *mac_cb)
+static int
+hns_mac_get_cfg(struct dsaf_device *dsaf_dev, struct hns_mac_cb *mac_cb)
 {
        int ret;
        u32 mac_mode_idx;
index 0ce07f6eb1e6247eb84361f3012d53fbb1a4d443..ca50c2553a9cb16c92e2d1dbfeaac7e03808fc71 100644 (file)
@@ -28,7 +28,7 @@
 #include "hns_dsaf_rcb.h"
 #include "hns_dsaf_misc.h"
 
-const char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
+const static char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
        [DSAF_MODE_DISABLE_2PORT_64VM] = "2port-64vf",
        [DSAF_MODE_DISABLE_6PORT_0VM] = "6port-16rss",
        [DSAF_MODE_DISABLE_6PORT_16VM] = "6port-16vf",
@@ -42,7 +42,7 @@ static const struct acpi_device_id hns_dsaf_acpi_match[] = {
 };
 MODULE_DEVICE_TABLE(acpi, hns_dsaf_acpi_match);
 
-int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
+static int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 {
        int ret, i;
        u32 desc_num;
@@ -959,7 +959,8 @@ static void hns_dsaf_tcam_mc_invld(struct dsaf_device *dsaf_dev, u32 address)
        spin_unlock_bh(&dsaf_dev->tcam_lock);
 }
 
-void hns_dsaf_tcam_addr_get(struct dsaf_drv_tbl_tcam_key *mac_key, u8 *addr)
+static void
+hns_dsaf_tcam_addr_get(struct dsaf_drv_tbl_tcam_key *mac_key, u8 *addr)
 {
        addr[0] = mac_key->high.bits.mac_0;
        addr[1] = mac_key->high.bits.mac_1;
@@ -1682,7 +1683,6 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
        struct dsaf_tbl_tcam_mcast_cfg mac_data;
        struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
        struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
-       struct dsaf_drv_tbl_tcam_key tmp_mac_key;
        struct dsaf_tbl_tcam_data tcam_data;
        u8 mc_addr[ETH_ALEN];
        int mskid;
@@ -1739,10 +1739,6 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
                /* if exist, add in */
                hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data,
                                     &mac_data);
-
-               tmp_mac_key.high.val =
-                       le32_to_cpu(tcam_data.tbl_tcam_data_high);
-               tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
        }
 
        /* config hardware entry */
@@ -1852,7 +1848,7 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
        struct dsaf_tbl_tcam_data tcam_data;
        int mskid;
        const u8 empty_msk[sizeof(mac_data.tbl_mcast_port_msk)] = {0};
-       struct dsaf_drv_tbl_tcam_key mask_key, tmp_mac_key;
+       struct dsaf_drv_tbl_tcam_key mask_key;
        struct dsaf_tbl_tcam_data *pmask_key = NULL;
        u8 mc_addr[ETH_ALEN];
 
@@ -1915,9 +1911,6 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
        /* read entry */
        hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
 
-       tmp_mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
-       tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
-
        /*del the port*/
        if (mac_entry->port_num < DSAF_SERVICE_NW_NUM) {
                mskid = mac_entry->port_num;
@@ -2084,8 +2077,9 @@ static void hns_dsaf_pfc_unit_cnt(struct dsaf_device *dsaf_dev, int  mac_id,
  * @dsaf_id: dsa fabric id
  * @xge_ge_work_mode
  */
-void hns_dsaf_port_work_rate_cfg(struct dsaf_device *dsaf_dev, int mac_id,
-                                enum dsaf_port_rate_mode rate_mode)
+static void
+hns_dsaf_port_work_rate_cfg(struct dsaf_device *dsaf_dev, int mac_id,
+                           enum dsaf_port_rate_mode rate_mode)
 {
        u32 port_work_mode;
 
index acf29633ec793435541865231fc8e1f2d3a82efc..16294cd3c95459891c65080cd49c61f839afaa60 100644 (file)
@@ -340,7 +340,8 @@ static void hns_dsaf_xge_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
  * bit18-19 for com/dfx
  * @enable: false - request reset , true - drop reset
  */
-void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset)
+static void
+hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset)
 {
        u32 reg_addr;
 
@@ -362,7 +363,7 @@ void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset)
  * bit18-19 for com/dfx
  * @enable: false - request reset , true - drop reset
  */
-void
+static void
 hns_dsaf_srst_chns_acpi(struct dsaf_device *dsaf_dev, u32 msk, bool dereset)
 {
        hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC,
@@ -370,7 +371,7 @@ hns_dsaf_srst_chns_acpi(struct dsaf_device *dsaf_dev, u32 msk, bool dereset)
                                   msk, dereset);
 }
 
-void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool dereset)
+static void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool dereset)
 {
        if (!dereset) {
                dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_RESET_REQ_REG, 1);
@@ -384,7 +385,7 @@ void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool dereset)
        }
 }
 
-void hns_dsaf_roce_srst_acpi(struct dsaf_device *dsaf_dev, bool dereset)
+static void hns_dsaf_roce_srst_acpi(struct dsaf_device *dsaf_dev, bool dereset)
 {
        hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC,
                                   HNS_ROCE_RESET_FUNC, 0, dereset);
@@ -568,7 +569,7 @@ static phy_interface_t hns_mac_get_phy_if_acpi(struct hns_mac_cb *mac_cb)
        return phy_if;
 }
 
-int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
+static int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
 {
        u32 val = 0;
        int ret;
@@ -586,7 +587,7 @@ int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
        return 0;
 }
 
-int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
+static int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
 {
        union acpi_object *obj;
        union acpi_object obj_args, argv4;
index 93e71e27401b4da815e899753dc7be1a83ff3f14..d160d8c9e45ba60b3543643c13748272baada910 100644 (file)
@@ -73,7 +73,7 @@ hns_ppe_common_get_ioaddr(struct ppe_common_cb *ppe_common)
  * comm_index: common index
  * retuen 0 - success , negative --fail
  */
-int hns_ppe_common_get_cfg(struct dsaf_device *dsaf_dev, int comm_index)
+static int hns_ppe_common_get_cfg(struct dsaf_device *dsaf_dev, int comm_index)
 {
        struct ppe_common_cb *ppe_common;
        int ppe_num;
@@ -104,7 +104,8 @@ int hns_ppe_common_get_cfg(struct dsaf_device *dsaf_dev, int comm_index)
        return 0;
 }
 
-void hns_ppe_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index)
+static void
+hns_ppe_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index)
 {
        dsaf_dev->ppe_common[comm_index] = NULL;
 }
@@ -203,9 +204,9 @@ static int hns_ppe_common_init_hw(struct ppe_common_cb *ppe_common)
        enum dsaf_mode dsaf_mode = dsaf_dev->dsaf_mode;
 
        dsaf_dev->misc_op->ppe_comm_srst(dsaf_dev, 0);
-       mdelay(100);
+       msleep(100);
        dsaf_dev->misc_op->ppe_comm_srst(dsaf_dev, 1);
-       mdelay(100);
+       msleep(100);
 
        if (ppe_common->ppe_mode == PPE_COMMON_MODE_SERVICE) {
                switch (dsaf_mode) {
@@ -337,7 +338,7 @@ static void hns_ppe_uninit_hw(struct hns_ppe_cb *ppe_cb)
        }
 }
 
-void hns_ppe_uninit_ex(struct ppe_common_cb *ppe_common)
+static void hns_ppe_uninit_ex(struct ppe_common_cb *ppe_common)
 {
        u32 i;
 
index e2e28532e4dc2d03cf15330c621f8fb49469e382..9d76e2e54f9df576b65702f720a79ab2a612dbb3 100644 (file)
@@ -705,7 +705,7 @@ void hns_rcb_get_queue_mode(enum dsaf_mode dsaf_mode, u16 *max_vfn,
        }
 }
 
-int hns_rcb_get_ring_num(struct dsaf_device *dsaf_dev)
+static int hns_rcb_get_ring_num(struct dsaf_device *dsaf_dev)
 {
        switch (dsaf_dev->dsaf_mode) {
        case DSAF_MODE_ENABLE_FIX:
@@ -741,7 +741,7 @@ int hns_rcb_get_ring_num(struct dsaf_device *dsaf_dev)
        }
 }
 
-void __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common)
+static void __iomem *hns_rcb_common_get_vaddr(struct rcb_common_cb *rcb_common)
 {
        struct dsaf_device *dsaf_dev = rcb_common->dsaf_dev;
 
index 51e7e9f5af4992b3a21f3cbfd4d27bed06d092e0..ba4316910dea1726da855c13b78e95bb6bd36a3c 100644 (file)
@@ -215,10 +215,10 @@ static void hns_xgmac_init(void *mac_drv)
        u32 port = drv->mac_id;
 
        dsaf_dev->misc_op->xge_srst(dsaf_dev, port, 0);
-       mdelay(100);
+       msleep(100);
        dsaf_dev->misc_op->xge_srst(dsaf_dev, port, 1);
 
-       mdelay(100);
+       msleep(100);
        hns_xgmac_lf_rf_control_init(drv);
        hns_xgmac_exc_irq_en(drv, 0);
 
@@ -311,7 +311,7 @@ static void hns_xgmac_config_max_frame_length(void *mac_drv, u16 newval)
        dsaf_write_dev(drv, XGMAC_MAC_MAX_PKT_SIZE_REG, newval);
 }
 
-void hns_xgmac_update_stats(void *mac_drv)
+static void hns_xgmac_update_stats(void *mac_drv)
 {
        struct mac_driver *drv = (struct mac_driver *)mac_drv;
        struct mac_hw_stats *hw_stats = &drv->mac_cb->hw_stats;
index ef9ef703d13a0e0efff11404afb41532f571283f..c2ac187ec8fcf911ed077e95099288ebbb4f0074 100644 (file)
@@ -1300,7 +1300,7 @@ static int hns_nic_net_set_mac_address(struct net_device *ndev, void *p)
        return 0;
 }
 
-void hns_nic_update_stats(struct net_device *netdev)
+static void hns_nic_update_stats(struct net_device *netdev)
 {
        struct hns_nic_priv *priv = netdev_priv(netdev);
        struct hnae_handle *h = priv->ae_handle;
@@ -1582,7 +1582,7 @@ static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr,
 
 /* use only for netconsole to poll with the device without interrupt */
 #ifdef CONFIG_NET_POLL_CONTROLLER
-void hns_nic_poll_controller(struct net_device *ndev)
+static void hns_nic_poll_controller(struct net_device *ndev)
 {
        struct hns_nic_priv *priv = netdev_priv(ndev);
        unsigned long flags;
@@ -1935,7 +1935,7 @@ static int hns_nic_uc_unsync(struct net_device *netdev,
  *
  * return void
  */
-void hns_set_multicast_list(struct net_device *ndev)
+static void hns_set_multicast_list(struct net_device *ndev)
 {
        struct hns_nic_priv *priv = netdev_priv(ndev);
        struct hnae_handle *h = priv->ae_handle;
@@ -1957,7 +1957,7 @@ void hns_set_multicast_list(struct net_device *ndev)
        }
 }
 
-void hns_nic_set_rx_mode(struct net_device *ndev)
+static void hns_nic_set_rx_mode(struct net_device *ndev)
 {
        struct hns_nic_priv *priv = netdev_priv(ndev);
        struct hnae_handle *h = priv->ae_handle;
@@ -2022,7 +2022,8 @@ static void hns_nic_get_stats64(struct net_device *ndev,
 
 static u16
 hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb,
-                    void *accel_priv, select_queue_fallback_t fallback)
+                    struct net_device *sb_dev,
+                    select_queue_fallback_t fallback)
 {
        struct ethhdr *eth_hdr = (struct ethhdr *)skb->data;
        struct hns_nic_priv *priv = netdev_priv(ndev);
@@ -2032,7 +2033,7 @@ hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb,
            is_multicast_ether_addr(eth_hdr->h_dest))
                return 0;
        else
-               return fallback(ndev, skb);
+               return fallback(ndev, skb, NULL);
 }
 
 static const struct net_device_ops hns_nic_netdev_ops = {
index 2e14a3ae1d8be0f9841a5c53f456c4d2e4f4d270..3957205abb8111f83c9685be4334f8de09f53427 100644 (file)
@@ -658,8 +658,8 @@ static void hns_nic_get_drvinfo(struct net_device *net_dev,
  * @dev: net device
  * @param: ethtool parameter
  */
-void hns_get_ringparam(struct net_device *net_dev,
-                      struct ethtool_ringparam *param)
+static void hns_get_ringparam(struct net_device *net_dev,
+                             struct ethtool_ringparam *param)
 {
        struct hns_nic_priv *priv = netdev_priv(net_dev);
        struct hnae_ae_ops *ops;
@@ -808,7 +808,8 @@ static int hns_set_coalesce(struct net_device *net_dev,
  * @dev: net device
  * @ch: channel info.
  */
-void hns_get_channels(struct net_device *net_dev, struct ethtool_channels *ch)
+static void
+hns_get_channels(struct net_device *net_dev, struct ethtool_channels *ch)
 {
        struct hns_nic_priv *priv = netdev_priv(net_dev);
 
@@ -825,8 +826,8 @@ void hns_get_channels(struct net_device *net_dev, struct ethtool_channels *ch)
  * @stats: statistics info.
  * @data: statistics data.
  */
-void hns_get_ethtool_stats(struct net_device *netdev,
-                          struct ethtool_stats *stats, u64 *data)
+static void hns_get_ethtool_stats(struct net_device *netdev,
+                                 struct ethtool_stats *stats, u64 *data)
 {
        u64 *p = data;
        struct hns_nic_priv *priv = netdev_priv(netdev);
@@ -883,7 +884,7 @@ void hns_get_ethtool_stats(struct net_device *netdev,
  * @stats: string set ID.
  * @data: objects data.
  */
-void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
        struct hns_nic_priv *priv = netdev_priv(netdev);
        struct hnae_handle *h = priv->ae_handle;
@@ -973,7 +974,7 @@ void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
  *
  * Return string set count.
  */
-int hns_get_sset_count(struct net_device *netdev, int stringset)
+static int hns_get_sset_count(struct net_device *netdev, int stringset)
 {
        struct hns_nic_priv *priv = netdev_priv(netdev);
        struct hnae_handle *h = priv->ae_handle;
@@ -1007,7 +1008,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset)
  *
  * Return 0 on success, negative on failure.
  */
-int hns_phy_led_set(struct net_device *netdev, int value)
+static int hns_phy_led_set(struct net_device *netdev, int value)
 {
        int retval;
        struct phy_device *phy_dev = netdev->phydev;
@@ -1029,7 +1030,8 @@ int hns_phy_led_set(struct net_device *netdev, int value)
  *
  * Return 0 on success, negative on failure.
  */
-int hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
+static int
+hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
 {
        struct hns_nic_priv *priv = netdev_priv(netdev);
        struct hnae_handle *h = priv->ae_handle;
@@ -1103,8 +1105,8 @@ int hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
  * @cmd: ethtool cmd
  * @date: register data
  */
-void hns_get_regs(struct net_device *net_dev, struct ethtool_regs *cmd,
-                 void *data)
+static void hns_get_regs(struct net_device *net_dev, struct ethtool_regs *cmd,
+                        void *data)
 {
        struct hns_nic_priv *priv = netdev_priv(net_dev);
        struct hnae_ae_ops *ops;
index 9d79dad2c6aae0f9bdbddeac1659ae4f08751d3d..fff5be8078ac388102456f3e505a5a0d46b630bf 100644 (file)
@@ -1,14 +1,7 @@
-/*
- * Copyright (c) 2016-2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/list.h>
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 
 #include "hnae3.h"
@@ -41,13 +34,13 @@ static void hnae3_set_client_init_flag(struct hnae3_client *client,
 {
        switch (client->type) {
        case HNAE3_CLIENT_KNIC:
-               hnae_set_bit(ae_dev->flag, HNAE3_KNIC_CLIENT_INITED_B, inited);
+               hnae3_set_bit(ae_dev->flag, HNAE3_KNIC_CLIENT_INITED_B, inited);
                break;
        case HNAE3_CLIENT_UNIC:
-               hnae_set_bit(ae_dev->flag, HNAE3_UNIC_CLIENT_INITED_B, inited);
+               hnae3_set_bit(ae_dev->flag, HNAE3_UNIC_CLIENT_INITED_B, inited);
                break;
        case HNAE3_CLIENT_ROCE:
-               hnae_set_bit(ae_dev->flag, HNAE3_ROCE_CLIENT_INITED_B, inited);
+               hnae3_set_bit(ae_dev->flag, HNAE3_ROCE_CLIENT_INITED_B, inited);
                break;
        default:
                break;
@@ -61,16 +54,16 @@ static int hnae3_get_client_init_flag(struct hnae3_client *client,
 
        switch (client->type) {
        case HNAE3_CLIENT_KNIC:
-               inited = hnae_get_bit(ae_dev->flag,
+               inited = hnae3_get_bit(ae_dev->flag,
                                       HNAE3_KNIC_CLIENT_INITED_B);
                break;
        case HNAE3_CLIENT_UNIC:
-               inited = hnae_get_bit(ae_dev->flag,
+               inited = hnae3_get_bit(ae_dev->flag,
                                       HNAE3_UNIC_CLIENT_INITED_B);
                break;
        case HNAE3_CLIENT_ROCE:
-               inited = hnae_get_bit(ae_dev->flag,
-                                     HNAE3_ROCE_CLIENT_INITED_B);
+               inited = hnae3_get_bit(ae_dev->flag,
+                                      HNAE3_ROCE_CLIENT_INITED_B);
                break;
        default:
                break;
@@ -86,7 +79,7 @@ static int hnae3_match_n_instantiate(struct hnae3_client *client,
 
        /* check if this client matches the type of ae_dev */
        if (!(hnae3_client_match(client->type, ae_dev->dev_type) &&
-             hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
+             hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
                return 0;
        }
 
@@ -95,7 +88,7 @@ static int hnae3_match_n_instantiate(struct hnae3_client *client,
                ret = ae_dev->ops->init_client_instance(client, ae_dev);
                if (ret) {
                        dev_err(&ae_dev->pdev->dev,
-                               "fail to instantiate client\n");
+                               "fail to instantiate client, ret = %d\n", ret);
                        return ret;
                }
 
@@ -135,7 +128,8 @@ int hnae3_register_client(struct hnae3_client *client)
                ret = hnae3_match_n_instantiate(client, ae_dev, true);
                if (ret)
                        dev_err(&ae_dev->pdev->dev,
-                               "match and instantiation failed for port\n");
+                               "match and instantiation failed for port, ret = %d\n",
+                               ret);
        }
 
 exit:
@@ -185,11 +179,12 @@ void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
                ae_dev->ops = ae_algo->ops;
                ret = ae_algo->ops->init_ae_dev(ae_dev);
                if (ret) {
-                       dev_err(&ae_dev->pdev->dev, "init ae_dev error.\n");
+                       dev_err(&ae_dev->pdev->dev,
+                               "init ae_dev error, ret = %d\n", ret);
                        continue;
                }
 
-               hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
+               hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
 
                /* check the client list for the match with this ae_dev type and
                 * initialize the figure out client instance
@@ -198,7 +193,8 @@ void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
                        ret = hnae3_match_n_instantiate(client, ae_dev, true);
                        if (ret)
                                dev_err(&ae_dev->pdev->dev,
-                                       "match and instantiation failed\n");
+                                       "match and instantiation failed, ret = %d\n",
+                                       ret);
                }
        }
 
@@ -218,7 +214,7 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
        mutex_lock(&hnae3_common_lock);
        /* Check if there are matched ae_dev */
        list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
-               if (!hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
+               if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
                        continue;
 
                id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
@@ -232,7 +228,7 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
                        hnae3_match_n_instantiate(client, ae_dev, false);
 
                ae_algo->ops->uninit_ae_dev(ae_dev);
-               hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
+               hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
        }
 
        list_del(&ae_algo->node);
@@ -271,11 +267,12 @@ void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
                /* ae_dev init should set flag */
                ret = ae_dev->ops->init_ae_dev(ae_dev);
                if (ret) {
-                       dev_err(&ae_dev->pdev->dev, "init ae_dev error\n");
+                       dev_err(&ae_dev->pdev->dev,
+                               "init ae_dev error, ret = %d\n", ret);
                        goto out_err;
                }
 
-               hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
+               hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
                break;
        }
 
@@ -286,7 +283,8 @@ void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
                ret = hnae3_match_n_instantiate(client, ae_dev, true);
                if (ret)
                        dev_err(&ae_dev->pdev->dev,
-                               "match and instantiation failed\n");
+                               "match and instantiation failed, ret = %d\n",
+                               ret);
        }
 
 out_err:
@@ -306,7 +304,7 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
        mutex_lock(&hnae3_common_lock);
        /* Check if there are matched ae_algo */
        list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
-               if (!hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
+               if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))
                        continue;
 
                id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
@@ -317,7 +315,7 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
                        hnae3_match_n_instantiate(client, ae_dev, false);
 
                ae_algo->ops->uninit_ae_dev(ae_dev);
-               hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
+               hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
        }
 
        list_del(&ae_dev->node);
index 8acb1d116a0282c69129c5d7a2b4eb9140f25764..67befff0bfc508e04100a28ff8f374ca4844b73c 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016-2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HNAE3_H
 #define __HNAE3_H
                BIT(HNAE3_DEV_SUPPORT_ROCE_B))
 
 #define hnae3_dev_roce_supported(hdev) \
-       hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)
+       hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)
 
 #define hnae3_dev_dcb_supported(hdev) \
-       hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_DCB_B)
+       hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_DCB_B)
 
 #define ring_ptr_move_fw(ring, p) \
        ((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
@@ -167,7 +161,6 @@ struct hnae3_client_ops {
 #define HNAE3_CLIENT_NAME_LENGTH 16
 struct hnae3_client {
        char name[HNAE3_CLIENT_NAME_LENGTH];
-       u16 version;
        unsigned long state;
        enum hnae3_client_type type;
        const struct hnae3_client_ops *ops;
@@ -436,7 +429,6 @@ struct hnae3_dcb_ops {
 struct hnae3_ae_algo {
        const struct hnae3_ae_ops *ops;
        struct list_head node;
-       char name[HNAE3_CLASS_NAME_SIZE];
        const struct pci_device_id *pdev_id_table;
 };
 
@@ -509,17 +501,17 @@ struct hnae3_handle {
        u32 numa_node_mask;     /* for multi-chip support */
 };
 
-#define hnae_set_field(origin, mask, shift, val) \
+#define hnae3_set_field(origin, mask, shift, val) \
        do { \
                (origin) &= (~(mask)); \
                (origin) |= ((val) << (shift)) & (mask); \
        } while (0)
-#define hnae_get_field(origin, mask, shift) (((origin) & (mask)) >> (shift))
+#define hnae3_get_field(origin, mask, shift) (((origin) & (mask)) >> (shift))
 
-#define hnae_set_bit(origin, shift, val) \
-       hnae_set_field((origin), (0x1 << (shift)), (shift), (val))
-#define hnae_get_bit(origin, shift) \
-       hnae_get_field((origin), (0x1 << (shift)), (shift))
+#define hnae3_set_bit(origin, shift, val) \
+       hnae3_set_field((origin), (0x1 << (shift)), (shift), (val))
+#define hnae3_get_bit(origin, shift) \
+       hnae3_get_field((origin), (0x1 << (shift)), (shift))
 
 void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev);
 void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev);
index eb82700da7d0dbc60c1b115695e4fe4ae2667d5a..ea5f8a84070dfd97e9644f9847c11bd2a46668a3 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016-2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include "hnae3.h"
 #include "hns3_enet.h"
index 25a73bb2e642dde42ae59f10369e7d3fefcb53f7..bd031af38a96a59b7cbe6d618af0819f009b51c0 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016~2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
@@ -56,15 +50,16 @@ static const struct pci_device_id hns3_pci_tbl[] = {
        {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC),
         HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
        {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_VF), 0},
-       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF), 0},
+       {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF),
+        HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
        /* required last entry */
        {0, }
 };
 MODULE_DEVICE_TABLE(pci, hns3_pci_tbl);
 
-static irqreturn_t hns3_irq_handle(int irq, void *dev)
+static irqreturn_t hns3_irq_handle(int irq, void *vector)
 {
-       struct hns3_enet_tqp_vector *tqp_vector = dev;
+       struct hns3_enet_tqp_vector *tqp_vector = vector;
 
        napi_schedule(&tqp_vector->napi);
 
@@ -239,7 +234,28 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev)
        struct hnae3_handle *h = hns3_get_handle(netdev);
        struct hnae3_knic_private_info *kinfo = &h->kinfo;
        unsigned int queue_size = kinfo->rss_size * kinfo->num_tc;
-       int ret;
+       int i, ret;
+
+       if (kinfo->num_tc <= 1) {
+               netdev_reset_tc(netdev);
+       } else {
+               ret = netdev_set_num_tc(netdev, kinfo->num_tc);
+               if (ret) {
+                       netdev_err(netdev,
+                                  "netdev_set_num_tc fail, ret=%d!\n", ret);
+                       return ret;
+               }
+
+               for (i = 0; i < HNAE3_MAX_TC; i++) {
+                       if (!kinfo->tc_info[i].enable)
+                               continue;
+
+                       netdev_set_tc_queue(netdev,
+                                           kinfo->tc_info[i].tc,
+                                           kinfo->tc_info[i].tqp_count,
+                                           kinfo->tc_info[i].tqp_offset);
+               }
+       }
 
        ret = netif_set_real_num_tx_queues(netdev, queue_size);
        if (ret) {
@@ -312,7 +328,9 @@ out_start_err:
 static int hns3_nic_net_open(struct net_device *netdev)
 {
        struct hns3_nic_priv *priv = netdev_priv(netdev);
-       int ret;
+       struct hnae3_handle *h = hns3_get_handle(netdev);
+       struct hnae3_knic_private_info *kinfo;
+       int i, ret;
 
        netif_carrier_off(netdev);
 
@@ -327,6 +345,12 @@ static int hns3_nic_net_open(struct net_device *netdev)
                return ret;
        }
 
+       kinfo = &h->kinfo;
+       for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+               netdev_set_prio_tc_map(netdev, i,
+                                      kinfo->prio_tc[i]);
+       }
+
        priv->ae_handle->last_reset_time = jiffies;
        return 0;
 }
@@ -493,8 +517,8 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
 
        /* find the txbd field values */
        *paylen = skb->len - hdr_len;
-       hnae_set_bit(*type_cs_vlan_tso,
-                    HNS3_TXD_TSO_B, 1);
+       hnae3_set_bit(*type_cs_vlan_tso,
+                     HNS3_TXD_TSO_B, 1);
 
        /* get MSS for TSO */
        *mss = skb_shinfo(skb)->gso_size;
@@ -586,21 +610,21 @@ static void hns3_set_l2l3l4_len(struct sk_buff *skb, u8 ol4_proto,
 
        /* compute L2 header size for normal packet, defined in 2 Bytes */
        l2_len = l3.hdr - skb->data;
-       hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_M,
-                      HNS3_TXD_L2LEN_S, l2_len >> 1);
+       hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_M,
+                       HNS3_TXD_L2LEN_S, l2_len >> 1);
 
        /* tunnel packet*/
        if (skb->encapsulation) {
                /* compute OL2 header size, defined in 2 Bytes */
                ol2_len = l2_len;
-               hnae_set_field(*ol_type_vlan_len_msec,
-                              HNS3_TXD_L2LEN_M,
-                              HNS3_TXD_L2LEN_S, ol2_len >> 1);
+               hnae3_set_field(*ol_type_vlan_len_msec,
+                               HNS3_TXD_L2LEN_M,
+                               HNS3_TXD_L2LEN_S, ol2_len >> 1);
 
                /* compute OL3 header size, defined in 4 Bytes */
                ol3_len = l4.hdr - l3.hdr;
-               hnae_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L3LEN_M,
-                              HNS3_TXD_L3LEN_S, ol3_len >> 2);
+               hnae3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L3LEN_M,
+                               HNS3_TXD_L3LEN_S, ol3_len >> 2);
 
                /* MAC in UDP, MAC in GRE (0x6558)*/
                if ((ol4_proto == IPPROTO_UDP) || (ol4_proto == IPPROTO_GRE)) {
@@ -609,16 +633,17 @@ static void hns3_set_l2l3l4_len(struct sk_buff *skb, u8 ol4_proto,
 
                        /* compute OL4 header size, defined in 4 Bytes. */
                        ol4_len = l2_hdr - l4.hdr;
-                       hnae_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L4LEN_M,
-                                      HNS3_TXD_L4LEN_S, ol4_len >> 2);
+                       hnae3_set_field(*ol_type_vlan_len_msec,
+                                       HNS3_TXD_L4LEN_M, HNS3_TXD_L4LEN_S,
+                                       ol4_len >> 2);
 
                        /* switch IP header ptr from outer to inner header */
                        l3.hdr = skb_inner_network_header(skb);
 
                        /* compute inner l2 header size, defined in 2 Bytes. */
                        l2_len = l3.hdr - l2_hdr;
-                       hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_M,
-                                      HNS3_TXD_L2LEN_S, l2_len >> 1);
+                       hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_M,
+                                       HNS3_TXD_L2LEN_S, l2_len >> 1);
                } else {
                        /* skb packet types not supported by hardware,
                         * txbd len fild doesn't be filled.
@@ -634,22 +659,24 @@ static void hns3_set_l2l3l4_len(struct sk_buff *skb, u8 ol4_proto,
 
        /* compute inner(/normal) L3 header size, defined in 4 Bytes */
        l3_len = l4.hdr - l3.hdr;
-       hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L3LEN_M,
-                      HNS3_TXD_L3LEN_S, l3_len >> 2);
+       hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3LEN_M,
+                       HNS3_TXD_L3LEN_S, l3_len >> 2);
 
        /* compute inner(/normal) L4 header size, defined in 4 Bytes */
        switch (l4_proto) {
        case IPPROTO_TCP:
-               hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
-                              HNS3_TXD_L4LEN_S, l4.tcp->doff);
+               hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
+                               HNS3_TXD_L4LEN_S, l4.tcp->doff);
                break;
        case IPPROTO_SCTP:
-               hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
-                              HNS3_TXD_L4LEN_S, (sizeof(struct sctphdr) >> 2));
+               hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
+                               HNS3_TXD_L4LEN_S,
+                               (sizeof(struct sctphdr) >> 2));
                break;
        case IPPROTO_UDP:
-               hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
-                              HNS3_TXD_L4LEN_S, (sizeof(struct udphdr) >> 2));
+               hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
+                               HNS3_TXD_L4LEN_S,
+                               (sizeof(struct udphdr) >> 2));
                break;
        default:
                /* skb packet types not supported by hardware,
@@ -703,32 +730,34 @@ static int hns3_set_l3l4_type_csum(struct sk_buff *skb, u8 ol4_proto,
                /* define outer network header type.*/
                if (skb->protocol == htons(ETH_P_IP)) {
                        if (skb_is_gso(skb))
-                               hnae_set_field(*ol_type_vlan_len_msec,
-                                              HNS3_TXD_OL3T_M, HNS3_TXD_OL3T_S,
-                                              HNS3_OL3T_IPV4_CSUM);
+                               hnae3_set_field(*ol_type_vlan_len_msec,
+                                               HNS3_TXD_OL3T_M,
+                                               HNS3_TXD_OL3T_S,
+                                               HNS3_OL3T_IPV4_CSUM);
                        else
-                               hnae_set_field(*ol_type_vlan_len_msec,
-                                              HNS3_TXD_OL3T_M, HNS3_TXD_OL3T_S,
-                                              HNS3_OL3T_IPV4_NO_CSUM);
+                               hnae3_set_field(*ol_type_vlan_len_msec,
+                                               HNS3_TXD_OL3T_M,
+                                               HNS3_TXD_OL3T_S,
+                                               HNS3_OL3T_IPV4_NO_CSUM);
 
                } else if (skb->protocol == htons(ETH_P_IPV6)) {
-                       hnae_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_M,
-                                      HNS3_TXD_OL3T_S, HNS3_OL3T_IPV6);
+                       hnae3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_M,
+                                       HNS3_TXD_OL3T_S, HNS3_OL3T_IPV6);
                }
 
                /* define tunnel type(OL4).*/
                switch (l4_proto) {
                case IPPROTO_UDP:
-                       hnae_set_field(*ol_type_vlan_len_msec,
-                                      HNS3_TXD_TUNTYPE_M,
-                                      HNS3_TXD_TUNTYPE_S,
-                                      HNS3_TUN_MAC_IN_UDP);
+                       hnae3_set_field(*ol_type_vlan_len_msec,
+                                       HNS3_TXD_TUNTYPE_M,
+                                       HNS3_TXD_TUNTYPE_S,
+                                       HNS3_TUN_MAC_IN_UDP);
                        break;
                case IPPROTO_GRE:
-                       hnae_set_field(*ol_type_vlan_len_msec,
-                                      HNS3_TXD_TUNTYPE_M,
-                                      HNS3_TXD_TUNTYPE_S,
-                                      HNS3_TUN_NVGRE);
+                       hnae3_set_field(*ol_type_vlan_len_msec,
+                                       HNS3_TXD_TUNTYPE_M,
+                                       HNS3_TXD_TUNTYPE_S,
+                                       HNS3_TUN_NVGRE);
                        break;
                default:
                        /* drop the skb tunnel packet if hardware don't support,
@@ -749,43 +778,43 @@ static int hns3_set_l3l4_type_csum(struct sk_buff *skb, u8 ol4_proto,
        }
 
        if (l3.v4->version == 4) {
-               hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
-                              HNS3_TXD_L3T_S, HNS3_L3T_IPV4);
+               hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
+                               HNS3_TXD_L3T_S, HNS3_L3T_IPV4);
 
                /* the stack computes the IP header already, the only time we
                 * need the hardware to recompute it is in the case of TSO.
                 */
                if (skb_is_gso(skb))
-                       hnae_set_bit(*type_cs_vlan_tso, HNS3_TXD_L3CS_B, 1);
-
-               hnae_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+                       hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L3CS_B, 1);
        } else if (l3.v6->version == 6) {
-               hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
-                              HNS3_TXD_L3T_S, HNS3_L3T_IPV6);
-               hnae_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+               hnae3_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
+                               HNS3_TXD_L3T_S, HNS3_L3T_IPV6);
        }
 
        switch (l4_proto) {
        case IPPROTO_TCP:
-               hnae_set_field(*type_cs_vlan_tso,
-                              HNS3_TXD_L4T_M,
-                              HNS3_TXD_L4T_S,
-                              HNS3_L4T_TCP);
+               hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+               hnae3_set_field(*type_cs_vlan_tso,
+                               HNS3_TXD_L4T_M,
+                               HNS3_TXD_L4T_S,
+                               HNS3_L4T_TCP);
                break;
        case IPPROTO_UDP:
                if (hns3_tunnel_csum_bug(skb))
                        break;
 
-               hnae_set_field(*type_cs_vlan_tso,
-                              HNS3_TXD_L4T_M,
-                              HNS3_TXD_L4T_S,
-                              HNS3_L4T_UDP);
+               hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+               hnae3_set_field(*type_cs_vlan_tso,
+                               HNS3_TXD_L4T_M,
+                               HNS3_TXD_L4T_S,
+                               HNS3_L4T_UDP);
                break;
        case IPPROTO_SCTP:
-               hnae_set_field(*type_cs_vlan_tso,
-                              HNS3_TXD_L4T_M,
-                              HNS3_TXD_L4T_S,
-                              HNS3_L4T_SCTP);
+               hnae3_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+               hnae3_set_field(*type_cs_vlan_tso,
+                               HNS3_TXD_L4T_M,
+                               HNS3_TXD_L4T_S,
+                               HNS3_L4T_SCTP);
                break;
        default:
                /* drop the skb tunnel packet if hardware don't support,
@@ -807,11 +836,11 @@ static int hns3_set_l3l4_type_csum(struct sk_buff *skb, u8 ol4_proto,
 static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
 {
        /* Config bd buffer end */
-       hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_BDTYPE_M,
-                      HNS3_TXD_BDTYPE_S, 0);
-       hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end);
-       hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1);
-       hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 0);
+       hnae3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_BDTYPE_M,
+                       HNS3_TXD_BDTYPE_S, 0);
+       hnae3_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end);
+       hnae3_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1);
+       hnae3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 0);
 }
 
 static int hns3_fill_desc_vtags(struct sk_buff *skb,
@@ -844,10 +873,10 @@ static int hns3_fill_desc_vtags(struct sk_buff *skb,
                 * and use inner_vtag in one tag case.
                 */
                if (skb->protocol == htons(ETH_P_8021Q)) {
-                       hnae_set_bit(*out_vlan_flag, HNS3_TXD_OVLAN_B, 1);
+                       hnae3_set_bit(*out_vlan_flag, HNS3_TXD_OVLAN_B, 1);
                        *out_vtag = vlan_tag;
                } else {
-                       hnae_set_bit(*inner_vlan_flag, HNS3_TXD_VLAN_B, 1);
+                       hnae3_set_bit(*inner_vlan_flag, HNS3_TXD_VLAN_B, 1);
                        *inner_vtag = vlan_tag;
                }
        } else if (skb->protocol == htons(ETH_P_8021Q)) {
@@ -880,7 +909,6 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
        u16 out_vtag = 0;
        u32 paylen = 0;
        u16 mss = 0;
-       __be16 protocol;
        u8 ol4_proto;
        u8 il4_proto;
        int ret;
@@ -909,7 +937,6 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 
                if (skb->ip_summed == CHECKSUM_PARTIAL) {
                        skb_reset_mac_len(skb);
-                       protocol = skb->protocol;
 
                        ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto);
                        if (ret)
@@ -1135,7 +1162,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
        wmb(); /* Commit all data before submit */
 
-       hnae_queue_xmit(ring->tqp, buf_num);
+       hnae3_queue_xmit(ring->tqp, buf_num);
 
        return NETDEV_TX_OK;
 
@@ -1304,7 +1331,6 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data)
        u16 mode = mqprio_qopt->mode;
        u8 hw = mqprio_qopt->qopt.hw;
        bool if_running;
-       unsigned int i;
        int ret;
 
        if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS &&
@@ -1328,24 +1354,6 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data)
        if (ret)
                goto out;
 
-       if (tc <= 1) {
-               netdev_reset_tc(netdev);
-       } else {
-               ret = netdev_set_num_tc(netdev, tc);
-               if (ret)
-                       goto out;
-
-               for (i = 0; i < HNAE3_MAX_TC; i++) {
-                       if (!kinfo->tc_info[i].enable)
-                               continue;
-
-                       netdev_set_tc_queue(netdev,
-                                           kinfo->tc_info[i].tc,
-                                           kinfo->tc_info[i].tqp_count,
-                                           kinfo->tc_info[i].tqp_offset);
-               }
-       }
-
        ret = hns3_nic_set_real_num_queue(netdev);
 
 out:
@@ -1703,7 +1711,7 @@ static void hns3_set_default_feature(struct net_device *netdev)
 static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
                             struct hns3_desc_cb *cb)
 {
-       unsigned int order = hnae_page_order(ring);
+       unsigned int order = hnae3_page_order(ring);
        struct page *p;
 
        p = dev_alloc_pages(order);
@@ -1714,7 +1722,7 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
        cb->page_offset = 0;
        cb->reuse_flag = 0;
        cb->buf  = page_address(p);
-       cb->length = hnae_page_size(ring);
+       cb->length = hnae3_page_size(ring);
        cb->type = DESC_TYPE_PAGE;
 
        return 0;
@@ -1780,33 +1788,27 @@ static void hns3_free_buffers(struct hns3_enet_ring *ring)
 /* free desc along with its attached buffer */
 static void hns3_free_desc(struct hns3_enet_ring *ring)
 {
+       int size = ring->desc_num * sizeof(ring->desc[0]);
+
        hns3_free_buffers(ring);
 
-       dma_unmap_single(ring_to_dev(ring), ring->desc_dma_addr,
-                        ring->desc_num * sizeof(ring->desc[0]),
-                        DMA_BIDIRECTIONAL);
-       ring->desc_dma_addr = 0;
-       kfree(ring->desc);
-       ring->desc = NULL;
+       if (ring->desc) {
+               dma_free_coherent(ring_to_dev(ring), size,
+                                 ring->desc, ring->desc_dma_addr);
+               ring->desc = NULL;
+       }
 }
 
 static int hns3_alloc_desc(struct hns3_enet_ring *ring)
 {
        int size = ring->desc_num * sizeof(ring->desc[0]);
 
-       ring->desc = kzalloc(size, GFP_KERNEL);
+       ring->desc = dma_zalloc_coherent(ring_to_dev(ring), size,
+                                        &ring->desc_dma_addr,
+                                        GFP_KERNEL);
        if (!ring->desc)
                return -ENOMEM;
 
-       ring->desc_dma_addr = dma_map_single(ring_to_dev(ring), ring->desc,
-                                            size, DMA_BIDIRECTIONAL);
-       if (dma_mapping_error(ring_to_dev(ring), ring->desc_dma_addr)) {
-               ring->desc_dma_addr = 0;
-               kfree(ring->desc);
-               ring->desc = NULL;
-               return -ENOMEM;
-       }
-
        return 0;
 }
 
@@ -1887,7 +1889,7 @@ static void hns3_nic_reclaim_one_desc(struct hns3_enet_ring *ring, int *bytes,
 
        (*pkts) += (desc_cb->type == DESC_TYPE_SKB);
        (*bytes) += desc_cb->length;
-       /* desc_cb will be cleaned, after hnae_free_buffer_detach*/
+       /* desc_cb will be cleaned, after hnae3_free_buffer_detach*/
        hns3_free_buffer_detach(ring, ring->next_to_clean);
 
        ring_ptr_move_fw(ring, next_to_clean);
@@ -1917,7 +1919,7 @@ bool hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget)
        if (is_ring_empty(ring) || head == ring->next_to_clean)
                return true; /* no data to poll */
 
-       if (!is_valid_clean_head(ring, head)) {
+       if (unlikely(!is_valid_clean_head(ring, head))) {
                netdev_err(netdev, "wrong head (%d, %d-%d)\n", head,
                           ring->next_to_use, ring->next_to_clean);
 
@@ -2016,15 +2018,15 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
        bool twobufs;
 
        twobufs = ((PAGE_SIZE < 8192) &&
-               hnae_buf_size(ring) == HNS3_BUFFER_SIZE_2048);
+               hnae3_buf_size(ring) == HNS3_BUFFER_SIZE_2048);
 
        desc = &ring->desc[ring->next_to_clean];
        size = le16_to_cpu(desc->rx.size);
 
-       truesize = hnae_buf_size(ring);
+       truesize = hnae3_buf_size(ring);
 
        if (!twobufs)
-               last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
+               last_offset = hnae3_page_size(ring) - hnae3_buf_size(ring);
 
        skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
                        size - pull_len, truesize);
@@ -2076,13 +2078,13 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
                return;
 
        /* check if hardware has done checksum */
-       if (!hnae_get_bit(bd_base_info, HNS3_RXD_L3L4P_B))
+       if (!hnae3_get_bit(bd_base_info, HNS3_RXD_L3L4P_B))
                return;
 
-       if (unlikely(hnae_get_bit(l234info, HNS3_RXD_L3E_B) ||
-                    hnae_get_bit(l234info, HNS3_RXD_L4E_B) ||
-                    hnae_get_bit(l234info, HNS3_RXD_OL3E_B) ||
-                    hnae_get_bit(l234info, HNS3_RXD_OL4E_B))) {
+       if (unlikely(hnae3_get_bit(l234info, HNS3_RXD_L3E_B) ||
+                    hnae3_get_bit(l234info, HNS3_RXD_L4E_B) ||
+                    hnae3_get_bit(l234info, HNS3_RXD_OL3E_B) ||
+                    hnae3_get_bit(l234info, HNS3_RXD_OL4E_B))) {
                netdev_err(netdev, "L3/L4 error pkt\n");
                u64_stats_update_begin(&ring->syncp);
                ring->stats.l3l4_csum_err++;
@@ -2091,23 +2093,24 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
                return;
        }
 
-       l3_type = hnae_get_field(l234info, HNS3_RXD_L3ID_M,
-                                HNS3_RXD_L3ID_S);
-       l4_type = hnae_get_field(l234info, HNS3_RXD_L4ID_M,
-                                HNS3_RXD_L4ID_S);
+       l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M,
+                                 HNS3_RXD_L3ID_S);
+       l4_type = hnae3_get_field(l234info, HNS3_RXD_L4ID_M,
+                                 HNS3_RXD_L4ID_S);
 
-       ol4_type = hnae_get_field(l234info, HNS3_RXD_OL4ID_M, HNS3_RXD_OL4ID_S);
+       ol4_type = hnae3_get_field(l234info, HNS3_RXD_OL4ID_M,
+                                  HNS3_RXD_OL4ID_S);
        switch (ol4_type) {
        case HNS3_OL4_TYPE_MAC_IN_UDP:
        case HNS3_OL4_TYPE_NVGRE:
                skb->csum_level = 1;
        case HNS3_OL4_TYPE_NO_TUN:
                /* Can checksum ipv4 or ipv6 + UDP/TCP/SCTP packets */
-               if (l3_type == HNS3_L3_TYPE_IPV4 ||
-                   (l3_type == HNS3_L3_TYPE_IPV6 &&
-                    (l4_type == HNS3_L4_TYPE_UDP ||
-                     l4_type == HNS3_L4_TYPE_TCP ||
-                     l4_type == HNS3_L4_TYPE_SCTP)))
+               if ((l3_type == HNS3_L3_TYPE_IPV4 ||
+                    l3_type == HNS3_L3_TYPE_IPV6) &&
+                   (l4_type == HNS3_L4_TYPE_UDP ||
+                    l4_type == HNS3_L4_TYPE_TCP ||
+                    l4_type == HNS3_L4_TYPE_SCTP))
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                break;
        }
@@ -2135,8 +2138,8 @@ static u16 hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
 #define HNS3_STRP_OUTER_VLAN   0x1
 #define HNS3_STRP_INNER_VLAN   0x2
 
-       switch (hnae_get_field(l234info, HNS3_RXD_STRP_TAGP_M,
-                              HNS3_RXD_STRP_TAGP_S)) {
+       switch (hnae3_get_field(l234info, HNS3_RXD_STRP_TAGP_M,
+                               HNS3_RXD_STRP_TAGP_S)) {
        case HNS3_STRP_OUTER_VLAN:
                vlan_tag = le16_to_cpu(desc->rx.ot_vlan_tag);
                break;
@@ -2174,7 +2177,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
        bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
 
        /* Check valid BD */
-       if (!hnae_get_bit(bd_base_info, HNS3_RXD_VLD_B))
+       if (unlikely(!hnae3_get_bit(bd_base_info, HNS3_RXD_VLD_B)))
                return -EFAULT;
 
        va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;
@@ -2229,7 +2232,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
                hns3_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
                ring_ptr_move_fw(ring, next_to_clean);
 
-               while (!hnae_get_bit(bd_base_info, HNS3_RXD_FE_B)) {
+               while (!hnae3_get_bit(bd_base_info, HNS3_RXD_FE_B)) {
                        desc = &ring->desc[ring->next_to_clean];
                        desc_cb = &ring->desc_cb[ring->next_to_clean];
                        bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
@@ -2257,7 +2260,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
                                               vlan_tag);
        }
 
-       if (unlikely(!hnae_get_bit(bd_base_info, HNS3_RXD_VLD_B))) {
+       if (unlikely(!hnae3_get_bit(bd_base_info, HNS3_RXD_VLD_B))) {
                netdev_err(netdev, "no valid bd,%016llx,%016llx\n",
                           ((u64 *)desc)[0], ((u64 *)desc)[1]);
                u64_stats_update_begin(&ring->syncp);
@@ -2269,7 +2272,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
        }
 
        if (unlikely((!desc->rx.pkt_len) ||
-                    hnae_get_bit(l234info, HNS3_RXD_TRUNCAT_B))) {
+                    hnae3_get_bit(l234info, HNS3_RXD_TRUNCAT_B))) {
                netdev_err(netdev, "truncated pkt\n");
                u64_stats_update_begin(&ring->syncp);
                ring->stats.err_pkt_len++;
@@ -2279,7 +2282,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
                return -EFAULT;
        }
 
-       if (unlikely(hnae_get_bit(l234info, HNS3_RXD_L2E_B))) {
+       if (unlikely(hnae3_get_bit(l234info, HNS3_RXD_L2E_B))) {
                netdev_err(netdev, "L2 error pkt\n");
                u64_stats_update_begin(&ring->syncp);
                ring->stats.l2_err++;
@@ -2532,10 +2535,10 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
        tx_ring = tqp_vector->tx_group.ring;
        if (tx_ring) {
                cur_chain->tqp_index = tx_ring->tqp->tqp_index;
-               hnae_set_bit(cur_chain->flag, HNAE3_RING_TYPE_B,
-                            HNAE3_RING_TYPE_TX);
-               hnae_set_field(cur_chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
-                              HNAE3_RING_GL_IDX_S, HNAE3_RING_GL_TX);
+               hnae3_set_bit(cur_chain->flag, HNAE3_RING_TYPE_B,
+                             HNAE3_RING_TYPE_TX);
+               hnae3_set_field(cur_chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
+                               HNAE3_RING_GL_IDX_S, HNAE3_RING_GL_TX);
 
                cur_chain->next = NULL;
 
@@ -2549,12 +2552,12 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
 
                        cur_chain->next = chain;
                        chain->tqp_index = tx_ring->tqp->tqp_index;
-                       hnae_set_bit(chain->flag, HNAE3_RING_TYPE_B,
-                                    HNAE3_RING_TYPE_TX);
-                       hnae_set_field(chain->int_gl_idx,
-                                      HNAE3_RING_GL_IDX_M,
-                                      HNAE3_RING_GL_IDX_S,
-                                      HNAE3_RING_GL_TX);
+                       hnae3_set_bit(chain->flag, HNAE3_RING_TYPE_B,
+                                     HNAE3_RING_TYPE_TX);
+                       hnae3_set_field(chain->int_gl_idx,
+                                       HNAE3_RING_GL_IDX_M,
+                                       HNAE3_RING_GL_IDX_S,
+                                       HNAE3_RING_GL_TX);
 
                        cur_chain = chain;
                }
@@ -2564,10 +2567,10 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
        if (!tx_ring && rx_ring) {
                cur_chain->next = NULL;
                cur_chain->tqp_index = rx_ring->tqp->tqp_index;
-               hnae_set_bit(cur_chain->flag, HNAE3_RING_TYPE_B,
-                            HNAE3_RING_TYPE_RX);
-               hnae_set_field(cur_chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
-                              HNAE3_RING_GL_IDX_S, HNAE3_RING_GL_RX);
+               hnae3_set_bit(cur_chain->flag, HNAE3_RING_TYPE_B,
+                             HNAE3_RING_TYPE_RX);
+               hnae3_set_field(cur_chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
+                               HNAE3_RING_GL_IDX_S, HNAE3_RING_GL_RX);
 
                rx_ring = rx_ring->next;
        }
@@ -2579,10 +2582,10 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
 
                cur_chain->next = chain;
                chain->tqp_index = rx_ring->tqp->tqp_index;
-               hnae_set_bit(chain->flag, HNAE3_RING_TYPE_B,
-                            HNAE3_RING_TYPE_RX);
-               hnae_set_field(chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
-                              HNAE3_RING_GL_IDX_S, HNAE3_RING_GL_RX);
+               hnae3_set_bit(chain->flag, HNAE3_RING_TYPE_B,
+                             HNAE3_RING_TYPE_RX);
+               hnae3_set_field(chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
+                               HNAE3_RING_GL_IDX_S, HNAE3_RING_GL_RX);
 
                cur_chain = chain;
 
@@ -2745,10 +2748,6 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
                if (ret)
                        return ret;
 
-               ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
-               if (ret)
-                       return ret;
-
                hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
                if (priv->tqp_vector[i].irq_init_flag == HNS3_VECTOR_INITED) {
@@ -2809,7 +2808,7 @@ static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
                ring->io_base = q->io_base;
        }
 
-       hnae_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type);
+       hnae3_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type);
 
        ring->tqp = q;
        ring->desc = NULL;
@@ -3081,7 +3080,6 @@ static int hns3_client_init(struct hnae3_handle *handle)
        priv->dev = &pdev->dev;
        priv->netdev = netdev;
        priv->ae_handle = handle;
-       priv->ae_handle->reset_level = HNAE3_NONE_RESET;
        priv->ae_handle->last_reset_time = jiffies;
        priv->tx_timeout_count = 0;
 
@@ -3102,6 +3100,11 @@ static int hns3_client_init(struct hnae3_handle *handle)
        /* Carrier off reporting is important to ethtool even BEFORE open */
        netif_carrier_off(netdev);
 
+       if (handle->flags & HNAE3_SUPPORT_VF)
+               handle->reset_level = HNAE3_VF_RESET;
+       else
+               handle->reset_level = HNAE3_FUNC_RESET;
+
        ret = hns3_get_ring_config(priv);
        if (ret) {
                ret = -ENOMEM;
@@ -3208,7 +3211,6 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
        struct net_device *ndev = kinfo->netdev;
        bool if_running;
        int ret;
-       u8 i;
 
        if (tc > HNAE3_MAX_TC)
                return -EINVAL;
@@ -3218,10 +3220,6 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
 
        if_running = netif_running(ndev);
 
-       ret = netdev_set_num_tc(ndev, tc);
-       if (ret)
-               return ret;
-
        if (if_running) {
                (void)hns3_nic_net_stop(ndev);
                msleep(100);
@@ -3232,27 +3230,6 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
        if (ret)
                goto err_out;
 
-       if (tc <= 1) {
-               netdev_reset_tc(ndev);
-               goto out;
-       }
-
-       for (i = 0; i < HNAE3_MAX_TC; i++) {
-               struct hnae3_tc_info *tc_info = &kinfo->tc_info[i];
-
-               if (tc_info->enable)
-                       netdev_set_tc_queue(ndev,
-                                           tc_info->tc,
-                                           tc_info->tqp_count,
-                                           tc_info->tqp_offset);
-       }
-
-       for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
-               netdev_set_prio_tc_map(ndev, i,
-                                      kinfo->prio_tc[i]);
-       }
-
-out:
        ret = hns3_nic_set_real_num_queue(ndev);
 
 err_out:
@@ -3418,7 +3395,7 @@ static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
        struct net_device *ndev = kinfo->netdev;
 
        if (!netif_running(ndev))
-               return -EIO;
+               return 0;
 
        return hns3_nic_net_stop(ndev);
 }
@@ -3458,10 +3435,6 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
        /* Carrier off reporting is important to ethtool even BEFORE open */
        netif_carrier_off(netdev);
 
-       ret = hns3_get_ring_config(priv);
-       if (ret)
-               return ret;
-
        ret = hns3_nic_init_vector_data(priv);
        if (ret)
                return ret;
@@ -3493,10 +3466,6 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
        if (ret)
                netdev_err(netdev, "uninit ring error\n");
 
-       hns3_put_ring_config(priv);
-
-       priv->ring_data = NULL;
-
        hns3_uninit_mac_addr(netdev);
 
        return ret;
index 3b083d5ae9ce25832fdd01ddabd0be999804577f..e4b4a8f2ceaab58a1a2ed534e058ee937a67e27f 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HNS3_ENET_H
 #define __HNS3_ENET_H
@@ -499,7 +493,6 @@ struct hns3_enet_tqp_vector {
 
        u16 num_tqps;   /* total number of tqps in TQP vector */
 
-       cpumask_t affinity_mask;
        char name[HNAE3_INT_NAME_LEN];
 
        /* when 0 should adjust interrupt coalesce parameter */
@@ -591,7 +584,7 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
 #define hns3_write_dev(a, reg, value) \
        hns3_write_reg((a)->io_base, (reg), (value))
 
-#define hnae_queue_xmit(tqp, buf_num) writel_relaxed(buf_num, \
+#define hnae3_queue_xmit(tqp, buf_num) writel_relaxed(buf_num, \
                (tqp)->io_base + HNS3_RING_TX_RING_TAIL_REG)
 
 #define ring_to_dev(ring) (&(ring)->tqp->handle->pdev->dev)
@@ -601,9 +594,9 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
 
 #define tx_ring_data(priv, idx) ((priv)->ring_data[idx])
 
-#define hnae_buf_size(_ring) ((_ring)->buf_size)
-#define hnae_page_order(_ring) (get_order(hnae_buf_size(_ring)))
-#define hnae_page_size(_ring) (PAGE_SIZE << hnae_page_order(_ring))
+#define hnae3_buf_size(_ring) ((_ring)->buf_size)
+#define hnae3_page_order(_ring) (get_order(hnae3_buf_size(_ring)))
+#define hnae3_page_size(_ring) (PAGE_SIZE << hnae3_page_order(_ring))
 
 /* iterator for handling rings in ring group */
 #define hns3_for_each_ring(pos, head) \
index 40c0425b4023bd98a1ca2d926efc02fe5d378155..80ba95d76260ac796bbb0aa64b54d1b68015e53e 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016~2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/etherdevice.h>
 #include <linux/string.h>
@@ -201,7 +195,9 @@ static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget)
                rx_group = &ring->tqp_vector->rx_group;
                pre_rx_pkt = rx_group->total_packets;
 
+               preempt_disable();
                hns3_clean_rx_ring(ring, budget, hns3_lb_check_skb_data);
+               preempt_enable();
 
                rcv_good_pkt_total += (rx_group->total_packets - pre_rx_pkt);
                rx_group->total_packets = pre_rx_pkt;
index c36d64710fa69a94d911046a136cb231740f4c41..ac13cb2b168e5a6e67517837dd470e092a0db8f8 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016~2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
@@ -18,8 +12,7 @@
 #include "hclge_main.h"
 
 #define hclge_is_csq(ring) ((ring)->flag & HCLGE_TYPE_CSQ)
-#define hclge_ring_to_dma_dir(ring) (hclge_is_csq(ring) ? \
-       DMA_TO_DEVICE : DMA_FROM_DEVICE)
+
 #define cmq_ring_to_dev(ring)   (&(ring)->dev->pdev->dev)
 
 static int hclge_ring_space(struct hclge_cmq_ring *ring)
@@ -46,31 +39,24 @@ static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring)
 {
        int size  = ring->desc_num * sizeof(struct hclge_desc);
 
-       ring->desc = kzalloc(size, GFP_KERNEL);
+       ring->desc = dma_zalloc_coherent(cmq_ring_to_dev(ring),
+                                        size, &ring->desc_dma_addr,
+                                        GFP_KERNEL);
        if (!ring->desc)
                return -ENOMEM;
 
-       ring->desc_dma_addr = dma_map_single(cmq_ring_to_dev(ring), ring->desc,
-                                            size, DMA_BIDIRECTIONAL);
-       if (dma_mapping_error(cmq_ring_to_dev(ring), ring->desc_dma_addr)) {
-               ring->desc_dma_addr = 0;
-               kfree(ring->desc);
-               ring->desc = NULL;
-               return -ENOMEM;
-       }
-
        return 0;
 }
 
 static void hclge_free_cmd_desc(struct hclge_cmq_ring *ring)
 {
-       dma_unmap_single(cmq_ring_to_dev(ring), ring->desc_dma_addr,
-                        ring->desc_num * sizeof(ring->desc[0]),
-                        DMA_BIDIRECTIONAL);
+       int size  = ring->desc_num * sizeof(struct hclge_desc);
 
-       ring->desc_dma_addr = 0;
-       kfree(ring->desc);
-       ring->desc = NULL;
+       if (ring->desc) {
+               dma_free_coherent(cmq_ring_to_dev(ring), size,
+                                 ring->desc, ring->desc_dma_addr);
+               ring->desc = NULL;
+       }
 }
 
 static int hclge_alloc_cmd_queue(struct hclge_dev *hdev, int ring_type)
@@ -80,7 +66,7 @@ static int hclge_alloc_cmd_queue(struct hclge_dev *hdev, int ring_type)
                (ring_type == HCLGE_TYPE_CSQ) ? &hw->cmq.csq : &hw->cmq.crq;
        int ret;
 
-       ring->flag = ring_type;
+       ring->ring_type = ring_type;
        ring->dev = hdev;
 
        ret = hclge_alloc_cmd_desc(ring);
@@ -111,8 +97,6 @@ void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
 
        if (is_read)
                desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_WR);
-       else
-               desc->flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
 }
 
 static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring)
@@ -121,26 +105,26 @@ static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring)
        struct hclge_dev *hdev = ring->dev;
        struct hclge_hw *hw = &hdev->hw;
 
-       if (ring->flag == HCLGE_TYPE_CSQ) {
+       if (ring->ring_type == HCLGE_TYPE_CSQ) {
                hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_L_REG,
-                               (u32)dma);
+                               lower_32_bits(dma));
                hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_H_REG,
-                               (u32)((dma >> 31) >> 1));
+                               upper_32_bits(dma));
                hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG,
                                (ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
                                HCLGE_NIC_CMQ_ENABLE);
-               hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0);
                hclge_write_dev(hw, HCLGE_NIC_CSQ_HEAD_REG, 0);
+               hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0);
        } else {
                hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_L_REG,
-                               (u32)dma);
+                               lower_32_bits(dma));
                hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_H_REG,
-                               (u32)((dma >> 31) >> 1));
+                               upper_32_bits(dma));
                hclge_write_dev(hw, HCLGE_NIC_CRQ_DEPTH_REG,
                                (ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
                                HCLGE_NIC_CMQ_ENABLE);
-               hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
                hclge_write_dev(hw, HCLGE_NIC_CRQ_HEAD_REG, 0);
+               hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
        }
 }
 
@@ -152,33 +136,27 @@ static void hclge_cmd_init_regs(struct hclge_hw *hw)
 
 static int hclge_cmd_csq_clean(struct hclge_hw *hw)
 {
-       struct hclge_dev *hdev = (struct hclge_dev *)hw->back;
+       struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw);
        struct hclge_cmq_ring *csq = &hw->cmq.csq;
-       u16 ntc = csq->next_to_clean;
-       struct hclge_desc *desc;
-       int clean = 0;
        u32 head;
+       int clean;
 
-       desc = &csq->desc[ntc];
        head = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG);
        rmb(); /* Make sure head is ready before touch any data */
 
        if (!is_valid_csq_clean_head(csq, head)) {
-               dev_warn(&hdev->pdev->dev, "wrong head (%d, %d-%d)\n", head,
-                          csq->next_to_use, csq->next_to_clean);
-               return 0;
-       }
-
-       while (head != ntc) {
-               memset(desc, 0, sizeof(*desc));
-               ntc++;
-               if (ntc == csq->desc_num)
-                       ntc = 0;
-               desc = &csq->desc[ntc];
-               clean++;
+               dev_warn(&hdev->pdev->dev, "wrong cmd head (%d, %d-%d)\n", head,
+                        csq->next_to_use, csq->next_to_clean);
+               dev_warn(&hdev->pdev->dev,
+                        "Disabling any further commands to IMP firmware\n");
+               set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+               dev_warn(&hdev->pdev->dev,
+                        "IMP firmware watchdog reset soon expected!\n");
+               return -EIO;
        }
-       csq->next_to_clean = ntc;
 
+       clean = (head - csq->next_to_clean + csq->desc_num) % csq->desc_num;
+       csq->next_to_clean = head;
        return clean;
 }
 
@@ -216,7 +194,7 @@ static bool hclge_is_special_opcode(u16 opcode)
  **/
 int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 {
-       struct hclge_dev *hdev = (struct hclge_dev *)hw->back;
+       struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw);
        struct hclge_desc *desc_to_use;
        bool complete = false;
        u32 timeout = 0;
@@ -227,7 +205,8 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 
        spin_lock_bh(&hw->cmq.csq.lock);
 
-       if (num > hclge_ring_space(&hw->cmq.csq)) {
+       if (num > hclge_ring_space(&hw->cmq.csq) ||
+           test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
                spin_unlock_bh(&hw->cmq.csq.lock);
                return -EBUSY;
        }
@@ -256,33 +235,34 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
         */
        if (HCLGE_SEND_SYNC(le16_to_cpu(desc->flag))) {
                do {
-                       if (hclge_cmd_csq_done(hw))
+                       if (hclge_cmd_csq_done(hw)) {
+                               complete = true;
                                break;
+                       }
                        udelay(1);
                        timeout++;
                } while (timeout < hw->cmq.tx_timeout);
        }
 
-       if (hclge_cmd_csq_done(hw)) {
-               complete = true;
+       if (!complete) {
+               retval = -EAGAIN;
+       } else {
                handle = 0;
                while (handle < num) {
                        /* Get the result of hardware write back */
                        desc_to_use = &hw->cmq.csq.desc[ntc];
                        desc[handle] = *desc_to_use;
-                       pr_debug("Get cmd desc:\n");
 
                        if (likely(!hclge_is_special_opcode(opcode)))
                                desc_ret = le16_to_cpu(desc[handle].retval);
                        else
                                desc_ret = le16_to_cpu(desc[0].retval);
 
-                       if ((enum hclge_cmd_return_status)desc_ret ==
-                           HCLGE_CMD_EXEC_SUCCESS)
+                       if (desc_ret == HCLGE_CMD_EXEC_SUCCESS)
                                retval = 0;
                        else
                                retval = -EIO;
-                       hw->cmq.last_status = (enum hclge_cmd_status)desc_ret;
+                       hw->cmq.last_status = desc_ret;
                        ntc++;
                        handle++;
                        if (ntc == hw->cmq.csq.desc_num)
@@ -290,15 +270,13 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
                }
        }
 
-       if (!complete)
-               retval = -EAGAIN;
-
        /* Clean the command send queue */
        handle = hclge_cmd_csq_clean(hw);
-       if (handle != num) {
+       if (handle < 0)
+               retval = handle;
+       else if (handle != num)
                dev_warn(&hdev->pdev->dev,
                         "cleaned %d, need to clean %d\n", handle, num);
-       }
 
        spin_unlock_bh(&hw->cmq.csq.lock);
 
@@ -369,6 +347,7 @@ int hclge_cmd_init(struct hclge_dev *hdev)
        spin_lock_init(&hdev->hw.cmq.crq.lock);
 
        hclge_cmd_init_regs(&hdev->hw);
+       clear_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 
        ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
        if (ret) {
index d9aaa76c76eb40cf2e7e1991851f8b2abe299e7d..cd0a4f22847021d601573181e9fe77b4af7c55f9 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016~2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HCLGE_CMD_H
 #define __HCLGE_CMD_H
@@ -27,17 +21,10 @@ struct hclge_desc {
        __le32 data[6];
 };
 
-struct hclge_desc_cb {
-       dma_addr_t dma;
-       void *va;
-       u32 length;
-};
-
 struct hclge_cmq_ring {
        dma_addr_t desc_dma_addr;
        struct hclge_desc *desc;
-       struct hclge_desc_cb *desc_cb;
-       struct hclge_dev  *dev;
+       struct hclge_dev *dev;
        u32 head;
        u32 tail;
 
@@ -45,7 +32,7 @@ struct hclge_cmq_ring {
        u16 desc_num;
        int next_to_use;
        int next_to_clean;
-       u8 flag;
+       u8 ring_type; /* cmq ring type */
        spinlock_t lock; /* Command queue lock */
 };
 
@@ -71,26 +58,19 @@ struct hclge_misc_vector {
 struct hclge_cmq {
        struct hclge_cmq_ring csq;
        struct hclge_cmq_ring crq;
-       u16 tx_timeout; /* Tx timeout */
+       u16 tx_timeout;
        enum hclge_cmd_status last_status;
 };
 
-#define HCLGE_CMD_FLAG_IN_VALID_SHIFT  0
-#define HCLGE_CMD_FLAG_OUT_VALID_SHIFT 1
-#define HCLGE_CMD_FLAG_NEXT_SHIFT      2
-#define HCLGE_CMD_FLAG_WR_OR_RD_SHIFT  3
-#define HCLGE_CMD_FLAG_NO_INTR_SHIFT   4
-#define HCLGE_CMD_FLAG_ERR_INTR_SHIFT  5
-
-#define HCLGE_CMD_FLAG_IN      BIT(HCLGE_CMD_FLAG_IN_VALID_SHIFT)
-#define HCLGE_CMD_FLAG_OUT     BIT(HCLGE_CMD_FLAG_OUT_VALID_SHIFT)
-#define HCLGE_CMD_FLAG_NEXT    BIT(HCLGE_CMD_FLAG_NEXT_SHIFT)
-#define HCLGE_CMD_FLAG_WR      BIT(HCLGE_CMD_FLAG_WR_OR_RD_SHIFT)
-#define HCLGE_CMD_FLAG_NO_INTR BIT(HCLGE_CMD_FLAG_NO_INTR_SHIFT)
-#define HCLGE_CMD_FLAG_ERR_INTR        BIT(HCLGE_CMD_FLAG_ERR_INTR_SHIFT)
+#define HCLGE_CMD_FLAG_IN      BIT(0)
+#define HCLGE_CMD_FLAG_OUT     BIT(1)
+#define HCLGE_CMD_FLAG_NEXT    BIT(2)
+#define HCLGE_CMD_FLAG_WR      BIT(3)
+#define HCLGE_CMD_FLAG_NO_INTR BIT(4)
+#define HCLGE_CMD_FLAG_ERR_INTR        BIT(5)
 
 enum hclge_opcode_type {
-       /* Generic command */
+       /* Generic commands */
        HCLGE_OPC_QUERY_FW_VER          = 0x0001,
        HCLGE_OPC_CFG_RST_TRIGGER       = 0x0020,
        HCLGE_OPC_GBL_RST_STATUS        = 0x0021,
@@ -106,18 +86,16 @@ enum hclge_opcode_type {
        HCLGE_OPC_QUERY_REG_NUM         = 0x0040,
        HCLGE_OPC_QUERY_32_BIT_REG      = 0x0041,
        HCLGE_OPC_QUERY_64_BIT_REG      = 0x0042,
-       /* Device management command */
 
-       /* MAC commond */
+       /* MAC command */
        HCLGE_OPC_CONFIG_MAC_MODE       = 0x0301,
        HCLGE_OPC_CONFIG_AN_MODE        = 0x0304,
        HCLGE_OPC_QUERY_AN_RESULT       = 0x0306,
        HCLGE_OPC_QUERY_LINK_STATUS     = 0x0307,
        HCLGE_OPC_CONFIG_MAX_FRM_SIZE   = 0x0308,
        HCLGE_OPC_CONFIG_SPEED_DUP      = 0x0309,
-       /* MACSEC command */
 
-       /* PFC/Pause CMD*/
+       /* PFC/Pause commands */
        HCLGE_OPC_CFG_MAC_PAUSE_EN      = 0x0701,
        HCLGE_OPC_CFG_PFC_PAUSE_EN      = 0x0702,
        HCLGE_OPC_CFG_MAC_PARA          = 0x0703,
@@ -148,7 +126,7 @@ enum hclge_opcode_type {
        HCLGE_OPC_TM_QS_SCH_MODE_CFG    = 0x0814,
        HCLGE_OPC_TM_BP_TO_QSET_MAPPING = 0x0815,
 
-       /* Packet buffer allocate command */
+       /* Packet buffer allocate commands */
        HCLGE_OPC_TX_BUFF_ALLOC         = 0x0901,
        HCLGE_OPC_RX_PRIV_BUFF_ALLOC    = 0x0902,
        HCLGE_OPC_RX_PRIV_WL_ALLOC      = 0x0903,
@@ -156,11 +134,10 @@ enum hclge_opcode_type {
        HCLGE_OPC_RX_COM_WL_ALLOC       = 0x0905,
        HCLGE_OPC_RX_GBL_PKT_CNT        = 0x0906,
 
-       /* PTP command */
        /* TQP management command */
        HCLGE_OPC_SET_TQP_MAP           = 0x0A01,
 
-       /* TQP command */
+       /* TQP commands */
        HCLGE_OPC_CFG_TX_QUEUE          = 0x0B01,
        HCLGE_OPC_QUERY_TX_POINTER      = 0x0B02,
        HCLGE_OPC_QUERY_TX_STATUS       = 0x0B03,
@@ -172,10 +149,10 @@ enum hclge_opcode_type {
        HCLGE_OPC_CFG_COM_TQP_QUEUE     = 0x0B20,
        HCLGE_OPC_RESET_TQP_QUEUE       = 0x0B22,
 
-       /* TSO cmd */
+       /* TSO command */
        HCLGE_OPC_TSO_GENERIC_CONFIG    = 0x0C01,
 
-       /* RSS cmd */
+       /* RSS commands */
        HCLGE_OPC_RSS_GENERIC_CONFIG    = 0x0D01,
        HCLGE_OPC_RSS_INDIR_TABLE       = 0x0D07,
        HCLGE_OPC_RSS_TC_MODE           = 0x0D08,
@@ -184,15 +161,15 @@ enum hclge_opcode_type {
        /* Promisuous mode command */
        HCLGE_OPC_CFG_PROMISC_MODE      = 0x0E01,
 
-       /* Vlan offload command */
+       /* Vlan offload commands */
        HCLGE_OPC_VLAN_PORT_TX_CFG      = 0x0F01,
        HCLGE_OPC_VLAN_PORT_RX_CFG      = 0x0F02,
 
-       /* Interrupts cmd */
+       /* Interrupts commands */
        HCLGE_OPC_ADD_RING_TO_VECTOR    = 0x1503,
        HCLGE_OPC_DEL_RING_TO_VECTOR    = 0x1504,
 
-       /* MAC command */
+       /* MAC commands */
        HCLGE_OPC_MAC_VLAN_ADD              = 0x1000,
        HCLGE_OPC_MAC_VLAN_REMOVE           = 0x1001,
        HCLGE_OPC_MAC_VLAN_TYPE_ID          = 0x1002,
@@ -201,13 +178,13 @@ enum hclge_opcode_type {
        HCLGE_OPC_MAC_ETHTYPE_REMOVE    = 0x1011,
        HCLGE_OPC_MAC_VLAN_MASK_SET     = 0x1012,
 
-       /* Multicast linear table cmd */
+       /* Multicast linear table commands */
        HCLGE_OPC_MTA_MAC_MODE_CFG          = 0x1020,
        HCLGE_OPC_MTA_MAC_FUNC_CFG          = 0x1021,
        HCLGE_OPC_MTA_TBL_ITEM_CFG          = 0x1022,
        HCLGE_OPC_MTA_TBL_ITEM_QUERY    = 0x1023,
 
-       /* VLAN command */
+       /* VLAN commands */
        HCLGE_OPC_VLAN_FILTER_CTRL          = 0x1100,
        HCLGE_OPC_VLAN_FILTER_PF_CFG    = 0x1101,
        HCLGE_OPC_VLAN_FILTER_VF_CFG    = 0x1102,
@@ -215,7 +192,7 @@ enum hclge_opcode_type {
        /* MDIO command */
        HCLGE_OPC_MDIO_CONFIG           = 0x1900,
 
-       /* QCN command */
+       /* QCN commands */
        HCLGE_OPC_QCN_MOD_CFG           = 0x1A01,
        HCLGE_OPC_QCN_GRP_TMPLT_CFG     = 0x1A02,
        HCLGE_OPC_QCN_SHAPPING_IR_CFG   = 0x1A03,
@@ -225,7 +202,7 @@ enum hclge_opcode_type {
        HCLGE_OPC_QCN_AJUST_INIT        = 0x1A07,
        HCLGE_OPC_QCN_DFX_CNT_STATUS    = 0x1A08,
 
-       /* Mailbox cmd */
+       /* Mailbox command */
        HCLGEVF_OPC_MBX_PF_TO_VF        = 0x2000,
 
        /* Led command */
@@ -381,8 +358,10 @@ struct hclge_pf_res_cmd {
        __le16 buf_size;
        __le16 msixcap_localid_ba_nic;
        __le16 msixcap_localid_ba_rocee;
+#define HCLGE_MSIX_OFT_ROCEE_S         0
+#define HCLGE_MSIX_OFT_ROCEE_M         GENMASK(15, 0)
 #define HCLGE_PF_VEC_NUM_S             0
-#define HCLGE_PF_VEC_NUM_M             (0xff << HCLGE_PF_VEC_NUM_S)
+#define HCLGE_PF_VEC_NUM_M             GENMASK(7, 0)
        __le16 pf_intr_vector_number;
        __le16 pf_own_fun_number;
        __le32 rsv[3];
@@ -471,8 +450,8 @@ struct hclge_rss_tc_mode_cmd {
        u8 rsv[8];
 };
 
-#define HCLGE_LINK_STS_B       0
-#define HCLGE_LINK_STATUS      BIT(HCLGE_LINK_STS_B)
+#define HCLGE_LINK_STATUS_UP_B 0
+#define HCLGE_LINK_STATUS_UP_M BIT(HCLGE_LINK_STATUS_UP_B)
 struct hclge_link_status_cmd {
        u8 status;
        u8 rsv[23];
@@ -571,7 +550,8 @@ struct hclge_config_auto_neg_cmd {
 
 struct hclge_config_max_frm_size_cmd {
        __le16  max_frm_size;
-       u8      rsv[22];
+       u8      min_frm_size;
+       u8      rsv[21];
 };
 
 enum hclge_mac_vlan_tbl_opcode {
@@ -581,13 +561,13 @@ enum hclge_mac_vlan_tbl_opcode {
        HCLGE_MAC_VLAN_LKUP,    /* Lookup a entry through mac_vlan key */
 };
 
-#define HCLGE_MAC_VLAN_BIT0_EN_B       0x0
-#define HCLGE_MAC_VLAN_BIT1_EN_B       0x1
-#define HCLGE_MAC_EPORT_SW_EN_B                0xc
-#define HCLGE_MAC_EPORT_TYPE_B         0xb
-#define HCLGE_MAC_EPORT_VFID_S         0x3
+#define HCLGE_MAC_VLAN_BIT0_EN_B       0
+#define HCLGE_MAC_VLAN_BIT1_EN_B       1
+#define HCLGE_MAC_EPORT_SW_EN_B                12
+#define HCLGE_MAC_EPORT_TYPE_B         11
+#define HCLGE_MAC_EPORT_VFID_S         3
 #define HCLGE_MAC_EPORT_VFID_M         GENMASK(10, 3)
-#define HCLGE_MAC_EPORT_PFID_S         0x0
+#define HCLGE_MAC_EPORT_PFID_S         0
 #define HCLGE_MAC_EPORT_PFID_M         GENMASK(2, 0)
 struct hclge_mac_vlan_tbl_entry_cmd {
        u8      flags;
@@ -603,7 +583,7 @@ struct hclge_mac_vlan_tbl_entry_cmd {
        u8      rsv2[6];
 };
 
-#define HCLGE_VLAN_MASK_EN_B           0x0
+#define HCLGE_VLAN_MASK_EN_B           0
 struct hclge_mac_vlan_mask_entry_cmd {
        u8 rsv0[2];
        u8 vlan_mask;
@@ -634,23 +614,23 @@ struct hclge_mac_mgr_tbl_entry_cmd {
        u8      rsv3[2];
 };
 
-#define HCLGE_CFG_MTA_MAC_SEL_S                0x0
+#define HCLGE_CFG_MTA_MAC_SEL_S                0
 #define HCLGE_CFG_MTA_MAC_SEL_M                GENMASK(1, 0)
-#define HCLGE_CFG_MTA_MAC_EN_B         0x7
+#define HCLGE_CFG_MTA_MAC_EN_B         7
 struct hclge_mta_filter_mode_cmd {
        u8      dmac_sel_en; /* Use lowest 2 bit as sel_mode, bit 7 as enable */
        u8      rsv[23];
 };
 
-#define HCLGE_CFG_FUNC_MTA_ACCEPT_B    0x0
+#define HCLGE_CFG_FUNC_MTA_ACCEPT_B    0
 struct hclge_cfg_func_mta_filter_cmd {
        u8      accept; /* Only used lowest 1 bit */
        u8      function_id;
        u8      rsv[22];
 };
 
-#define HCLGE_CFG_MTA_ITEM_ACCEPT_B    0x0
-#define HCLGE_CFG_MTA_ITEM_IDX_S       0x0
+#define HCLGE_CFG_MTA_ITEM_ACCEPT_B    0
+#define HCLGE_CFG_MTA_ITEM_IDX_S       0
 #define HCLGE_CFG_MTA_ITEM_IDX_M       GENMASK(11, 0)
 struct hclge_cfg_func_mta_item_cmd {
        __le16  item_idx; /* Only used lowest 12 bit */
index 955f0e3d5c954e61b96962fe09170a9175c9bfb1..f08ebb7caaaf5e359fe2ba159fde8e873204979d 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016-2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include "hclge_main.h"
 #include "hclge_tm.h"
index 7d808ee96694f59f152e24c212d44870c007351b..278f21e027367ce65f5a848cbb67eabb2af893e0 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016~2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HCLGE_DCB_H__
 #define __HCLGE_DCB_H__
index d318d35e598fd61aa66406adb8048113f0252364..fc813b7f20e8ad69530f4100cfbaf7fc25069b0e 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016-2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/acpi.h>
 #include <linux/device.h>
@@ -938,18 +932,22 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
        hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
 
        if (hnae3_dev_roce_supported(hdev)) {
+               hdev->roce_base_msix_offset =
+               hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
+                               HCLGE_MSIX_OFT_ROCEE_M, HCLGE_MSIX_OFT_ROCEE_S);
                hdev->num_roce_msi =
-               hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
-                              HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+               hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
+                               HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
 
                /* PF should have NIC vectors and Roce vectors,
                 * NIC vectors are queued before Roce vectors.
                 */
-               hdev->num_msi = hdev->num_roce_msi  + HCLGE_ROCE_VECTOR_OFFSET;
+               hdev->num_msi = hdev->num_roce_msi  +
+                               hdev->roce_base_msix_offset;
        } else {
                hdev->num_msi =
-               hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
-                              HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+               hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
+                               HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
        }
 
        return 0;
@@ -1038,38 +1036,38 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
        req = (struct hclge_cfg_param_cmd *)desc[0].data;
 
        /* get the configuration */
-       cfg->vmdq_vport_num = hnae_get_field(__le32_to_cpu(req->param[0]),
-                                            HCLGE_CFG_VMDQ_M,
-                                            HCLGE_CFG_VMDQ_S);
-       cfg->tc_num = hnae_get_field(__le32_to_cpu(req->param[0]),
-                                    HCLGE_CFG_TC_NUM_M, HCLGE_CFG_TC_NUM_S);
-       cfg->tqp_desc_num = hnae_get_field(__le32_to_cpu(req->param[0]),
-                                          HCLGE_CFG_TQP_DESC_N_M,
-                                          HCLGE_CFG_TQP_DESC_N_S);
-
-       cfg->phy_addr = hnae_get_field(__le32_to_cpu(req->param[1]),
-                                      HCLGE_CFG_PHY_ADDR_M,
-                                      HCLGE_CFG_PHY_ADDR_S);
-       cfg->media_type = hnae_get_field(__le32_to_cpu(req->param[1]),
-                                        HCLGE_CFG_MEDIA_TP_M,
-                                        HCLGE_CFG_MEDIA_TP_S);
-       cfg->rx_buf_len = hnae_get_field(__le32_to_cpu(req->param[1]),
-                                        HCLGE_CFG_RX_BUF_LEN_M,
-                                        HCLGE_CFG_RX_BUF_LEN_S);
+       cfg->vmdq_vport_num = hnae3_get_field(__le32_to_cpu(req->param[0]),
+                                             HCLGE_CFG_VMDQ_M,
+                                             HCLGE_CFG_VMDQ_S);
+       cfg->tc_num = hnae3_get_field(__le32_to_cpu(req->param[0]),
+                                     HCLGE_CFG_TC_NUM_M, HCLGE_CFG_TC_NUM_S);
+       cfg->tqp_desc_num = hnae3_get_field(__le32_to_cpu(req->param[0]),
+                                           HCLGE_CFG_TQP_DESC_N_M,
+                                           HCLGE_CFG_TQP_DESC_N_S);
+
+       cfg->phy_addr = hnae3_get_field(__le32_to_cpu(req->param[1]),
+                                       HCLGE_CFG_PHY_ADDR_M,
+                                       HCLGE_CFG_PHY_ADDR_S);
+       cfg->media_type = hnae3_get_field(__le32_to_cpu(req->param[1]),
+                                         HCLGE_CFG_MEDIA_TP_M,
+                                         HCLGE_CFG_MEDIA_TP_S);
+       cfg->rx_buf_len = hnae3_get_field(__le32_to_cpu(req->param[1]),
+                                         HCLGE_CFG_RX_BUF_LEN_M,
+                                         HCLGE_CFG_RX_BUF_LEN_S);
        /* get mac_address */
        mac_addr_tmp = __le32_to_cpu(req->param[2]);
-       mac_addr_tmp_high = hnae_get_field(__le32_to_cpu(req->param[3]),
-                                          HCLGE_CFG_MAC_ADDR_H_M,
-                                          HCLGE_CFG_MAC_ADDR_H_S);
+       mac_addr_tmp_high = hnae3_get_field(__le32_to_cpu(req->param[3]),
+                                           HCLGE_CFG_MAC_ADDR_H_M,
+                                           HCLGE_CFG_MAC_ADDR_H_S);
 
        mac_addr_tmp |= (mac_addr_tmp_high << 31) << 1;
 
-       cfg->default_speed = hnae_get_field(__le32_to_cpu(req->param[3]),
-                                           HCLGE_CFG_DEFAULT_SPEED_M,
-                                           HCLGE_CFG_DEFAULT_SPEED_S);
-       cfg->rss_size_max = hnae_get_field(__le32_to_cpu(req->param[3]),
-                                          HCLGE_CFG_RSS_SIZE_M,
-                                          HCLGE_CFG_RSS_SIZE_S);
+       cfg->default_speed = hnae3_get_field(__le32_to_cpu(req->param[3]),
+                                            HCLGE_CFG_DEFAULT_SPEED_M,
+                                            HCLGE_CFG_DEFAULT_SPEED_S);
+       cfg->rss_size_max = hnae3_get_field(__le32_to_cpu(req->param[3]),
+                                           HCLGE_CFG_RSS_SIZE_M,
+                                           HCLGE_CFG_RSS_SIZE_S);
 
        for (i = 0; i < ETH_ALEN; i++)
                cfg->mac_addr[i] = (mac_addr_tmp >> (8 * i)) & 0xff;
@@ -1077,9 +1075,9 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
        req = (struct hclge_cfg_param_cmd *)desc[1].data;
        cfg->numa_node_map = __le32_to_cpu(req->param[0]);
 
-       cfg->speed_ability = hnae_get_field(__le32_to_cpu(req->param[1]),
-                                           HCLGE_CFG_SPEED_ABILITY_M,
-                                           HCLGE_CFG_SPEED_ABILITY_S);
+       cfg->speed_ability = hnae3_get_field(__le32_to_cpu(req->param[1]),
+                                            HCLGE_CFG_SPEED_ABILITY_M,
+                                            HCLGE_CFG_SPEED_ABILITY_S);
 }
 
 /* hclge_get_cfg: query the static parameter from flash
@@ -1098,22 +1096,22 @@ static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg)
                req = (struct hclge_cfg_param_cmd *)desc[i].data;
                hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_GET_CFG_PARAM,
                                           true);
-               hnae_set_field(offset, HCLGE_CFG_OFFSET_M,
-                              HCLGE_CFG_OFFSET_S, i * HCLGE_CFG_RD_LEN_BYTES);
+               hnae3_set_field(offset, HCLGE_CFG_OFFSET_M,
+                               HCLGE_CFG_OFFSET_S, i * HCLGE_CFG_RD_LEN_BYTES);
                /* Len should be united by 4 bytes when send to hardware */
-               hnae_set_field(offset, HCLGE_CFG_RD_LEN_M, HCLGE_CFG_RD_LEN_S,
-                              HCLGE_CFG_RD_LEN_BYTES / HCLGE_CFG_RD_LEN_UNIT);
+               hnae3_set_field(offset, HCLGE_CFG_RD_LEN_M, HCLGE_CFG_RD_LEN_S,
+                               HCLGE_CFG_RD_LEN_BYTES / HCLGE_CFG_RD_LEN_UNIT);
                req->offset = cpu_to_le32(offset);
        }
 
        ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PF_CFG_DESC_NUM);
        if (ret) {
-               dev_err(&hdev->pdev->dev,
-                       "get config failed %d.\n", ret);
+               dev_err(&hdev->pdev->dev, "get config failed %d.\n", ret);
                return ret;
        }
 
        hclge_parse_cfg(hcfg, desc);
+
        return 0;
 }
 
@@ -1130,13 +1128,10 @@ static int hclge_get_cap(struct hclge_dev *hdev)
 
        /* get pf resource */
        ret = hclge_query_pf_resource(hdev);
-       if (ret) {
-               dev_err(&hdev->pdev->dev,
-                       "query pf resource error %d.\n", ret);
-               return ret;
-       }
+       if (ret)
+               dev_err(&hdev->pdev->dev, "query pf resource error %d.\n", ret);
 
-       return 0;
+       return ret;
 }
 
 static int hclge_configure(struct hclge_dev *hdev)
@@ -1189,7 +1184,7 @@ static int hclge_configure(struct hclge_dev *hdev)
 
        /* Currently not support uncontiuous tc */
        for (i = 0; i < hdev->tm_info.num_tc; i++)
-               hnae_set_bit(hdev->hw_tc_map, i, 1);
+               hnae3_set_bit(hdev->hw_tc_map, i, 1);
 
        hdev->tx_sch_mode = HCLGE_FLAG_TC_BASE_SCH_MODE;
 
@@ -1208,13 +1203,13 @@ static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min,
        req = (struct hclge_cfg_tso_status_cmd *)desc.data;
 
        tso_mss = 0;
-       hnae_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M,
-                      HCLGE_TSO_MSS_MIN_S, tso_mss_min);
+       hnae3_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M,
+                       HCLGE_TSO_MSS_MIN_S, tso_mss_min);
        req->tso_mss_min = cpu_to_le16(tso_mss);
 
        tso_mss = 0;
-       hnae_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M,
-                      HCLGE_TSO_MSS_MIN_S, tso_mss_max);
+       hnae3_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M,
+                       HCLGE_TSO_MSS_MIN_S, tso_mss_max);
        req->tso_mss_max = cpu_to_le16(tso_mss);
 
        return hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -1265,13 +1260,10 @@ static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id,
        req->tqp_vid = cpu_to_le16(tqp_vid);
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
-               dev_err(&hdev->pdev->dev, "TQP map failed %d.\n",
-                       ret);
-               return ret;
-       }
+       if (ret)
+               dev_err(&hdev->pdev->dev, "TQP map failed %d.\n", ret);
 
-       return 0;
+       return ret;
 }
 
 static int  hclge_assign_tqp(struct hclge_vport *vport,
@@ -1330,12 +1322,10 @@ static int hclge_knic_setup(struct hclge_vport *vport, u16 num_tqps)
                return -ENOMEM;
 
        ret = hclge_assign_tqp(vport, kinfo->tqp, kinfo->num_tqps);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev, "fail to assign TQPs %d.\n", ret);
-               return -EINVAL;
-       }
 
-       return 0;
+       return ret;
 }
 
 static int hclge_map_tqp_to_vport(struct hclge_dev *hdev,
@@ -1487,13 +1477,11 @@ static int  hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev,
        }
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev, "tx buffer alloc cmd failed %d.\n",
                        ret);
-               return ret;
-       }
 
-       return 0;
+       return ret;
 }
 
 static int hclge_tx_buffer_alloc(struct hclge_dev *hdev,
@@ -1501,13 +1489,10 @@ static int hclge_tx_buffer_alloc(struct hclge_dev *hdev,
 {
        int ret = hclge_cmd_alloc_tx_buff(hdev, buf_alloc);
 
-       if (ret) {
-               dev_err(&hdev->pdev->dev,
-                       "tx buffer alloc failed %d\n", ret);
-               return ret;
-       }
+       if (ret)
+               dev_err(&hdev->pdev->dev, "tx buffer alloc failed %d\n", ret);
 
-       return 0;
+       return ret;
 }
 
 static int hclge_get_tc_num(struct hclge_dev *hdev)
@@ -1825,17 +1810,13 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev,
                            (1 << HCLGE_TC0_PRI_BUF_EN_B));
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "rx private buffer alloc cmd failed %d\n", ret);
-               return ret;
-       }
 
-       return 0;
+       return ret;
 }
 
-#define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0)
-
 static int hclge_rx_priv_wl_config(struct hclge_dev *hdev,
                                   struct hclge_pkt_buf_alloc *buf_alloc)
 {
@@ -1863,25 +1844,21 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev,
                        req->tc_wl[j].high =
                                cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S);
                        req->tc_wl[j].high |=
-                               cpu_to_le16(HCLGE_PRIV_ENABLE(priv->wl.high) <<
-                                           HCLGE_RX_PRIV_EN_B);
+                               cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
                        req->tc_wl[j].low =
                                cpu_to_le16(priv->wl.low >> HCLGE_BUF_UNIT_S);
                        req->tc_wl[j].low |=
-                               cpu_to_le16(HCLGE_PRIV_ENABLE(priv->wl.low) <<
-                                           HCLGE_RX_PRIV_EN_B);
+                                cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
                }
        }
 
        /* Send 2 descriptor at one time */
        ret = hclge_cmd_send(&hdev->hw, desc, 2);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "rx private waterline config cmd failed %d\n",
                        ret);
-               return ret;
-       }
-       return 0;
+       return ret;
 }
 
 static int hclge_common_thrd_config(struct hclge_dev *hdev,
@@ -1911,24 +1888,20 @@ static int hclge_common_thrd_config(struct hclge_dev *hdev,
                        req->com_thrd[j].high =
                                cpu_to_le16(tc->high >> HCLGE_BUF_UNIT_S);
                        req->com_thrd[j].high |=
-                               cpu_to_le16(HCLGE_PRIV_ENABLE(tc->high) <<
-                                           HCLGE_RX_PRIV_EN_B);
+                                cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
                        req->com_thrd[j].low =
                                cpu_to_le16(tc->low >> HCLGE_BUF_UNIT_S);
                        req->com_thrd[j].low |=
-                               cpu_to_le16(HCLGE_PRIV_ENABLE(tc->low) <<
-                                           HCLGE_RX_PRIV_EN_B);
+                                cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
                }
        }
 
        /* Send 2 descriptors at one time */
        ret = hclge_cmd_send(&hdev->hw, desc, 2);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "common threshold config cmd failed %d\n", ret);
-               return ret;
-       }
-       return 0;
+       return ret;
 }
 
 static int hclge_common_wl_config(struct hclge_dev *hdev,
@@ -1943,23 +1916,17 @@ static int hclge_common_wl_config(struct hclge_dev *hdev,
 
        req = (struct hclge_rx_com_wl *)desc.data;
        req->com_wl.high = cpu_to_le16(buf->self.high >> HCLGE_BUF_UNIT_S);
-       req->com_wl.high |=
-               cpu_to_le16(HCLGE_PRIV_ENABLE(buf->self.high) <<
-                           HCLGE_RX_PRIV_EN_B);
+       req->com_wl.high |=  cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
 
        req->com_wl.low = cpu_to_le16(buf->self.low >> HCLGE_BUF_UNIT_S);
-       req->com_wl.low |=
-               cpu_to_le16(HCLGE_PRIV_ENABLE(buf->self.low) <<
-                           HCLGE_RX_PRIV_EN_B);
+       req->com_wl.low |=  cpu_to_le16(BIT(HCLGE_RX_PRIV_EN_B));
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "common waterline config cmd failed %d\n", ret);
-               return ret;
-       }
 
-       return 0;
+       return ret;
 }
 
 int hclge_buffer_alloc(struct hclge_dev *hdev)
@@ -2074,7 +2041,7 @@ static int hclge_init_msi(struct hclge_dev *hdev)
        hdev->num_msi_left = vectors;
        hdev->base_msi_vector = pdev->irq;
        hdev->roce_base_vector = hdev->base_msi_vector +
-                               HCLGE_ROCE_VECTOR_OFFSET;
+                               hdev->roce_base_msix_offset;
 
        hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
                                           sizeof(u16), GFP_KERNEL);
@@ -2118,48 +2085,48 @@ int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex)
 
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, false);
 
-       hnae_set_bit(req->speed_dup, HCLGE_CFG_DUPLEX_B, !!duplex);
+       hnae3_set_bit(req->speed_dup, HCLGE_CFG_DUPLEX_B, !!duplex);
 
        switch (speed) {
        case HCLGE_MAC_SPEED_10M:
-               hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                              HCLGE_CFG_SPEED_S, 6);
+               hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+                               HCLGE_CFG_SPEED_S, 6);
                break;
        case HCLGE_MAC_SPEED_100M:
-               hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                              HCLGE_CFG_SPEED_S, 7);
+               hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+                               HCLGE_CFG_SPEED_S, 7);
                break;
        case HCLGE_MAC_SPEED_1G:
-               hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                              HCLGE_CFG_SPEED_S, 0);
+               hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+                               HCLGE_CFG_SPEED_S, 0);
                break;
        case HCLGE_MAC_SPEED_10G:
-               hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                              HCLGE_CFG_SPEED_S, 1);
+               hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+                               HCLGE_CFG_SPEED_S, 1);
                break;
        case HCLGE_MAC_SPEED_25G:
-               hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                              HCLGE_CFG_SPEED_S, 2);
+               hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+                               HCLGE_CFG_SPEED_S, 2);
                break;
        case HCLGE_MAC_SPEED_40G:
-               hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                              HCLGE_CFG_SPEED_S, 3);
+               hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+                               HCLGE_CFG_SPEED_S, 3);
                break;
        case HCLGE_MAC_SPEED_50G:
-               hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                              HCLGE_CFG_SPEED_S, 4);
+               hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+                               HCLGE_CFG_SPEED_S, 4);
                break;
        case HCLGE_MAC_SPEED_100G:
-               hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                              HCLGE_CFG_SPEED_S, 5);
+               hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+                               HCLGE_CFG_SPEED_S, 5);
                break;
        default:
                dev_err(&hdev->pdev->dev, "invalid speed (%d)\n", speed);
                return -EINVAL;
        }
 
-       hnae_set_bit(req->mac_change_fec_en, HCLGE_CFG_MAC_SPEED_CHANGE_EN_B,
-                    1);
+       hnae3_set_bit(req->mac_change_fec_en, HCLGE_CFG_MAC_SPEED_CHANGE_EN_B,
+                     1);
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
        if (ret) {
@@ -2201,18 +2168,16 @@ static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed,
                return ret;
        }
 
-       *duplex = hnae_get_bit(req->an_syn_dup_speed, HCLGE_QUERY_DUPLEX_B);
-       speed_tmp = hnae_get_field(req->an_syn_dup_speed, HCLGE_QUERY_SPEED_M,
-                                  HCLGE_QUERY_SPEED_S);
+       *duplex = hnae3_get_bit(req->an_syn_dup_speed, HCLGE_QUERY_DUPLEX_B);
+       speed_tmp = hnae3_get_field(req->an_syn_dup_speed, HCLGE_QUERY_SPEED_M,
+                                   HCLGE_QUERY_SPEED_S);
 
        ret = hclge_parse_speed(speed_tmp, speed);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "could not parse speed(=%d), %d\n", speed_tmp, ret);
-               return -EIO;
-       }
 
-       return 0;
+       return ret;
 }
 
 static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
@@ -2225,17 +2190,15 @@ static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false);
 
        req = (struct hclge_config_auto_neg_cmd *)desc.data;
-       hnae_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
+       hnae3_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
        req->cfg_an_cmd_flag = cpu_to_le32(flag);
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev, "auto neg set cmd failed %d.\n",
                        ret);
-               return ret;
-       }
 
-       return 0;
+       return ret;
 }
 
 static int hclge_set_autoneg(struct hnae3_handle *handle, bool enable)
@@ -2269,8 +2232,8 @@ static int hclge_set_default_mac_vlan_mask(struct hclge_dev *hdev,
        req = (struct hclge_mac_vlan_mask_entry_cmd *)desc.data;
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_MASK_SET, false);
 
-       hnae_set_bit(req->vlan_mask, HCLGE_VLAN_MASK_EN_B,
-                    mask_vlan ? 1 : 0);
+       hnae3_set_bit(req->vlan_mask, HCLGE_VLAN_MASK_EN_B,
+                     mask_vlan ? 1 : 0);
        ether_addr_copy(req->mac_mask, mac_mask);
 
        status = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2341,13 +2304,11 @@ static int hclge_mac_init(struct hclge_dev *hdev)
                mtu = ETH_DATA_LEN;
 
        ret = hclge_set_mtu(handle, mtu);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "set mtu failed ret=%d\n", ret);
-               return ret;
-       }
 
-       return 0;
+       return ret;
 }
 
 static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
@@ -2386,7 +2347,7 @@ static int hclge_get_mac_link_status(struct hclge_dev *hdev)
        }
 
        req = (struct hclge_link_status_cmd *)desc.data;
-       link_status = req->status & HCLGE_LINK_STATUS;
+       link_status = req->status & HCLGE_LINK_STATUS_UP_M;
 
        return !!link_status;
 }
@@ -2505,7 +2466,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
        u32 cmdq_src_reg;
 
        /* fetch the events from their corresponding regs */
-       rst_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG);
+       rst_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
        cmdq_src_reg = hclge_read_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG);
 
        /* Assumption: If by any chance reset and mailbox events are reported
@@ -2517,12 +2478,14 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 
        /* check for vector0 reset event sources */
        if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & rst_src_reg) {
+               set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
                set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending);
                *clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B);
                return HCLGE_VECTOR0_EVENT_RST;
        }
 
        if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_src_reg) {
+               set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
                set_bit(HNAE3_CORE_RESET, &hdev->reset_pending);
                *clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
                return HCLGE_VECTOR0_EVENT_RST;
@@ -2614,6 +2577,12 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 
 static void hclge_free_vector(struct hclge_dev *hdev, int vector_id)
 {
+       if (hdev->vector_status[vector_id] == HCLGE_INVALID_VPORT) {
+               dev_warn(&hdev->pdev->dev,
+                        "vector(vector_id %d) has been freed.\n", vector_id);
+               return;
+       }
+
        hdev->vector_status[vector_id] = HCLGE_INVALID_VPORT;
        hdev->num_msi_left += 1;
        hdev->num_msi_used -= 1;
@@ -2705,7 +2674,7 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
        }
 
        val = hclge_read_dev(&hdev->hw, reg);
-       while (hnae_get_bit(val, reg_bit) && cnt < HCLGE_RESET_WAIT_CNT) {
+       while (hnae3_get_bit(val, reg_bit) && cnt < HCLGE_RESET_WAIT_CNT) {
                msleep(HCLGE_RESET_WATI_MS);
                val = hclge_read_dev(&hdev->hw, reg);
                cnt++;
@@ -2727,8 +2696,7 @@ int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
        int ret;
 
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_RST_TRIGGER, false);
-       hnae_set_bit(req->mac_func_reset, HCLGE_CFG_RESET_MAC_B, 0);
-       hnae_set_bit(req->mac_func_reset, HCLGE_CFG_RESET_FUNC_B, 1);
+       hnae3_set_bit(req->mac_func_reset, HCLGE_CFG_RESET_FUNC_B, 1);
        req->fun_reset_vfid = func_id;
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2747,13 +2715,13 @@ static void hclge_do_reset(struct hclge_dev *hdev)
        switch (hdev->reset_type) {
        case HNAE3_GLOBAL_RESET:
                val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
-               hnae_set_bit(val, HCLGE_GLOBAL_RESET_BIT, 1);
+               hnae3_set_bit(val, HCLGE_GLOBAL_RESET_BIT, 1);
                hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val);
                dev_info(&pdev->dev, "Global Reset requested\n");
                break;
        case HNAE3_CORE_RESET:
                val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
-               hnae_set_bit(val, HCLGE_CORE_RESET_BIT, 1);
+               hnae3_set_bit(val, HCLGE_CORE_RESET_BIT, 1);
                hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val);
                dev_info(&pdev->dev, "Core Reset requested\n");
                break;
@@ -2810,8 +2778,6 @@ static void hclge_clear_reset_cause(struct hclge_dev *hdev)
                clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B);
                break;
        default:
-               dev_warn(&hdev->pdev->dev, "Unsupported reset event to clear:%d",
-                        hdev->reset_type);
                break;
        }
 
@@ -2824,16 +2790,17 @@ static void hclge_clear_reset_cause(struct hclge_dev *hdev)
 
 static void hclge_reset(struct hclge_dev *hdev)
 {
-       /* perform reset of the stack & ae device for a client */
+       struct hnae3_handle *handle;
 
+       /* perform reset of the stack & ae device for a client */
+       handle = &hdev->vport[0].nic;
+       rtnl_lock();
        hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
 
        if (!hclge_reset_wait(hdev)) {
-               rtnl_lock();
                hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
                hclge_reset_ae_dev(hdev->ae_dev);
                hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
-               rtnl_unlock();
 
                hclge_clear_reset_cause(hdev);
        } else {
@@ -2843,6 +2810,8 @@ static void hclge_reset(struct hclge_dev *hdev)
        }
 
        hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+       handle->last_reset_time = jiffies;
+       rtnl_unlock();
 }
 
 static void hclge_reset_event(struct hnae3_handle *handle)
@@ -2855,8 +2824,13 @@ static void hclge_reset_event(struct hnae3_handle *handle)
         * know this if last reset request did not occur very recently (watchdog
         * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz)
         * In case of new request we reset the "reset level" to PF reset.
+        * And if it is a repeat reset request of the most recent one then we
+        * want to make sure we throttle the reset request. Therefore, we will
+        * not allow it again before 3*HZ times.
         */
-       if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))
+       if (time_before(jiffies, (handle->last_reset_time + 3 * HZ)))
+               return;
+       else if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))
                handle->reset_level = HNAE3_FUNC_RESET;
 
        dev_info(&hdev->pdev->dev, "received reset event , reset type is %d",
@@ -2868,8 +2842,6 @@ static void hclge_reset_event(struct hnae3_handle *handle)
 
        if (handle->reset_level < HNAE3_GLOBAL_RESET)
                handle->reset_level++;
-
-       handle->last_reset_time = jiffies;
 }
 
 static void hclge_reset_subtask(struct hclge_dev *hdev)
@@ -3110,23 +3082,21 @@ static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid,
        for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
                u16 mode = 0;
 
-               hnae_set_bit(mode, HCLGE_RSS_TC_VALID_B, (tc_valid[i] & 0x1));
-               hnae_set_field(mode, HCLGE_RSS_TC_SIZE_M,
-                              HCLGE_RSS_TC_SIZE_S, tc_size[i]);
-               hnae_set_field(mode, HCLGE_RSS_TC_OFFSET_M,
-                              HCLGE_RSS_TC_OFFSET_S, tc_offset[i]);
+               hnae3_set_bit(mode, HCLGE_RSS_TC_VALID_B, (tc_valid[i] & 0x1));
+               hnae3_set_field(mode, HCLGE_RSS_TC_SIZE_M,
+                               HCLGE_RSS_TC_SIZE_S, tc_size[i]);
+               hnae3_set_field(mode, HCLGE_RSS_TC_OFFSET_M,
+                               HCLGE_RSS_TC_OFFSET_S, tc_offset[i]);
 
                req->rss_tc_mode[i] = cpu_to_le16(mode);
        }
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "Configure rss tc mode fail, status = %d\n", ret);
-               return ret;
-       }
 
-       return 0;
+       return ret;
 }
 
 static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
@@ -3149,13 +3119,10 @@ static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
        req->ipv6_sctp_en = hdev->vport[0].rss_tuple_sets.ipv6_sctp_en;
        req->ipv6_fragment_en = hdev->vport[0].rss_tuple_sets.ipv6_fragment_en;
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "Configure rss input fail, status = %d\n", ret);
-               return ret;
-       }
-
-       return 0;
+       return ret;
 }
 
 static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
@@ -3491,16 +3458,16 @@ int hclge_bind_ring_with_vector(struct hclge_vport *vport,
        i = 0;
        for (node = ring_chain; node; node = node->next) {
                tqp_type_and_id = le16_to_cpu(req->tqp_type_and_id[i]);
-               hnae_set_field(tqp_type_and_id,  HCLGE_INT_TYPE_M,
-                              HCLGE_INT_TYPE_S,
-                              hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
-               hnae_set_field(tqp_type_and_id, HCLGE_TQP_ID_M,
-                              HCLGE_TQP_ID_S, node->tqp_index);
-               hnae_set_field(tqp_type_and_id, HCLGE_INT_GL_IDX_M,
-                              HCLGE_INT_GL_IDX_S,
-                              hnae_get_field(node->int_gl_idx,
-                                             HNAE3_RING_GL_IDX_M,
-                                             HNAE3_RING_GL_IDX_S));
+               hnae3_set_field(tqp_type_and_id,  HCLGE_INT_TYPE_M,
+                               HCLGE_INT_TYPE_S,
+                               hnae3_get_bit(node->flag, HNAE3_RING_TYPE_B));
+               hnae3_set_field(tqp_type_and_id, HCLGE_TQP_ID_M,
+                               HCLGE_TQP_ID_S, node->tqp_index);
+               hnae3_set_field(tqp_type_and_id, HCLGE_INT_GL_IDX_M,
+                               HCLGE_INT_GL_IDX_S,
+                               hnae3_get_field(node->int_gl_idx,
+                                               HNAE3_RING_GL_IDX_M,
+                                               HNAE3_RING_GL_IDX_S));
                req->tqp_type_and_id[i] = cpu_to_le16(tqp_type_and_id);
                if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
                        req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
@@ -3603,12 +3570,11 @@ int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
                HCLGE_PROMISC_TX_EN_B | HCLGE_PROMISC_RX_EN_B;
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "Set promisc mode fail, status is %d.\n", ret);
-               return ret;
-       }
-       return 0;
+
+       return ret;
 }
 
 void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
@@ -3648,20 +3614,20 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
        int ret;
 
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false);
-       hnae_set_bit(loop_en, HCLGE_MAC_TX_EN_B, enable);
-       hnae_set_bit(loop_en, HCLGE_MAC_RX_EN_B, enable);
-       hnae_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, enable);
-       hnae_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, enable);
-       hnae_set_bit(loop_en, HCLGE_MAC_1588_TX_B, 0);
-       hnae_set_bit(loop_en, HCLGE_MAC_1588_RX_B, 0);
-       hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0);
-       hnae_set_bit(loop_en, HCLGE_MAC_LINE_LP_B, 0);
-       hnae_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, enable);
-       hnae_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, enable);
-       hnae_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, enable);
-       hnae_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
-       hnae_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
-       hnae_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
+       hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, enable);
+       hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, enable);
+       hnae3_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, enable);
+       hnae3_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, enable);
+       hnae3_set_bit(loop_en, HCLGE_MAC_1588_TX_B, 0);
+       hnae3_set_bit(loop_en, HCLGE_MAC_1588_RX_B, 0);
+       hnae3_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0);
+       hnae3_set_bit(loop_en, HCLGE_MAC_LINE_LP_B, 0);
+       hnae3_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, enable);
+       hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, enable);
+       hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, enable);
+       hnae3_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
+       hnae3_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
+       hnae3_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
        req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -3689,7 +3655,7 @@ static int hclge_set_mac_loopback(struct hclge_dev *hdev, bool en)
 
        /* 2 Then setup the loopback flag */
        loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en);
-       hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, en ? 1 : 0);
+       hnae3_set_bit(loop_en, HCLGE_MAC_APP_LP_B, en ? 1 : 0);
 
        req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
 
@@ -3953,20 +3919,18 @@ static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
        req = (struct hclge_mta_filter_mode_cmd *)desc.data;
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_MODE_CFG, false);
 
-       hnae_set_bit(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_EN_B,
-                    enable);
-       hnae_set_field(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_SEL_M,
-                      HCLGE_CFG_MTA_MAC_SEL_S, mta_mac_sel);
+       hnae3_set_bit(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_EN_B,
+                     enable);
+       hnae3_set_field(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_SEL_M,
+                       HCLGE_CFG_MTA_MAC_SEL_S, mta_mac_sel);
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "Config mat filter mode failed for cmd_send, ret =%d.\n",
                        ret);
-               return ret;
-       }
 
-       return 0;
+       return ret;
 }
 
 int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
@@ -3980,19 +3944,17 @@ int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
        req = (struct hclge_cfg_func_mta_filter_cmd *)desc.data;
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_FUNC_CFG, false);
 
-       hnae_set_bit(req->accept, HCLGE_CFG_FUNC_MTA_ACCEPT_B,
-                    enable);
+       hnae3_set_bit(req->accept, HCLGE_CFG_FUNC_MTA_ACCEPT_B,
+                     enable);
        req->function_id = func_id;
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev,
                        "Config func_id enable failed for cmd_send, ret =%d.\n",
                        ret);
-               return ret;
-       }
 
-       return 0;
+       return ret;
 }
 
 static int hclge_set_mta_table_item(struct hclge_vport *vport,
@@ -4007,10 +3969,10 @@ static int hclge_set_mta_table_item(struct hclge_vport *vport,
 
        req = (struct hclge_cfg_func_mta_item_cmd *)desc.data;
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_TBL_ITEM_CFG, false);
-       hnae_set_bit(req->accept, HCLGE_CFG_MTA_ITEM_ACCEPT_B, enable);
+       hnae3_set_bit(req->accept, HCLGE_CFG_MTA_ITEM_ACCEPT_B, enable);
 
-       hnae_set_field(item_idx, HCLGE_CFG_MTA_ITEM_IDX_M,
-                      HCLGE_CFG_MTA_ITEM_IDX_S, idx);
+       hnae3_set_field(item_idx, HCLGE_CFG_MTA_ITEM_IDX_M,
+                       HCLGE_CFG_MTA_ITEM_IDX_S, idx);
        req->item_idx = cpu_to_le16(item_idx);
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -4257,17 +4219,10 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
        }
 
        memset(&req, 0, sizeof(req));
-       hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
-       hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
-       hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 0);
-       hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
-
-       hnae_set_bit(egress_port, HCLGE_MAC_EPORT_SW_EN_B, 0);
-       hnae_set_bit(egress_port, HCLGE_MAC_EPORT_TYPE_B, 0);
-       hnae_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M,
-                      HCLGE_MAC_EPORT_VFID_S, vport->vport_id);
-       hnae_set_field(egress_port, HCLGE_MAC_EPORT_PFID_M,
-                      HCLGE_MAC_EPORT_PFID_S, 0);
+       hnae3_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+
+       hnae3_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M,
+                       HCLGE_MAC_EPORT_VFID_S, vport->vport_id);
 
        req.egress_port = cpu_to_le16(egress_port);
 
@@ -4318,8 +4273,8 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
        }
 
        memset(&req, 0, sizeof(req));
-       hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
-       hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+       hnae3_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+       hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
        hclge_prepare_mac_addr(&req, addr);
        ret = hclge_remove_mac_vlan_tbl(vport, &req);
 
@@ -4331,7 +4286,7 @@ static int hclge_add_mc_addr(struct hnae3_handle *handle,
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
 
-       return  hclge_add_mc_addr_common(vport, addr);
+       return hclge_add_mc_addr_common(vport, addr);
 }
 
 int hclge_add_mc_addr_common(struct hclge_vport *vport,
@@ -4351,10 +4306,10 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport,
                return -EINVAL;
        }
        memset(&req, 0, sizeof(req));
-       hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
-       hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
-       hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
-       hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+       hnae3_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+       hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+       hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
+       hnae3_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
        hclge_prepare_mac_addr(&req, addr);
        status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true);
        if (!status) {
@@ -4418,10 +4373,10 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport,
        }
 
        memset(&req, 0, sizeof(req));
-       hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
-       hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
-       hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
-       hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+       hnae3_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+       hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+       hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
+       hnae3_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
        hclge_prepare_mac_addr(&req, addr);
        status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true);
        if (!status) {
@@ -4604,13 +4559,11 @@ static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
        req->vlan_fe = filter_en;
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev, "set vlan filter fail, ret =%d.\n",
                        ret);
-               return ret;
-       }
 
-       return 0;
+       return ret;
 }
 
 #define HCLGE_FILTER_TYPE_VF           0
@@ -4802,19 +4755,19 @@ static int hclge_set_vlan_tx_offload_cfg(struct hclge_vport *vport)
        req = (struct hclge_vport_vtag_tx_cfg_cmd *)desc.data;
        req->def_vlan_tag1 = cpu_to_le16(vcfg->default_tag1);
        req->def_vlan_tag2 = cpu_to_le16(vcfg->default_tag2);
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_ACCEPT_TAG1_B,
-                       vcfg->accept_tag1 ? 1 : 0);
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_ACCEPT_UNTAG1_B,
-                       vcfg->accept_untag1 ? 1 : 0);
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_ACCEPT_TAG2_B,
-                       vcfg->accept_tag2 ? 1 : 0);
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_ACCEPT_UNTAG2_B,
-                       vcfg->accept_untag2 ? 1 : 0);
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_PORT_INS_TAG1_EN_B,
-                    vcfg->insert_tag1_en ? 1 : 0);
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_PORT_INS_TAG2_EN_B,
-                    vcfg->insert_tag2_en ? 1 : 0);
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_CFG_NIC_ROCE_SEL_B, 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_ACCEPT_TAG1_B,
+                     vcfg->accept_tag1 ? 1 : 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_ACCEPT_UNTAG1_B,
+                     vcfg->accept_untag1 ? 1 : 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_ACCEPT_TAG2_B,
+                     vcfg->accept_tag2 ? 1 : 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_ACCEPT_UNTAG2_B,
+                     vcfg->accept_untag2 ? 1 : 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_PORT_INS_TAG1_EN_B,
+                     vcfg->insert_tag1_en ? 1 : 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_PORT_INS_TAG2_EN_B,
+                     vcfg->insert_tag2_en ? 1 : 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_CFG_NIC_ROCE_SEL_B, 0);
 
        req->vf_offset = vport->vport_id / HCLGE_VF_NUM_PER_CMD;
        req->vf_bitmap[req->vf_offset] =
@@ -4840,14 +4793,14 @@ static int hclge_set_vlan_rx_offload_cfg(struct hclge_vport *vport)
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_RX_CFG, false);
 
        req = (struct hclge_vport_vtag_rx_cfg_cmd *)desc.data;
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_REM_TAG1_EN_B,
-                    vcfg->strip_tag1_en ? 1 : 0);
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_REM_TAG2_EN_B,
-                    vcfg->strip_tag2_en ? 1 : 0);
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_SHOW_TAG1_EN_B,
-                    vcfg->vlan1_vlan_prionly ? 1 : 0);
-       hnae_set_bit(req->vport_vlan_cfg, HCLGE_SHOW_TAG2_EN_B,
-                    vcfg->vlan2_vlan_prionly ? 1 : 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_REM_TAG1_EN_B,
+                     vcfg->strip_tag1_en ? 1 : 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_REM_TAG2_EN_B,
+                     vcfg->strip_tag2_en ? 1 : 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_SHOW_TAG1_EN_B,
+                     vcfg->vlan1_vlan_prionly ? 1 : 0);
+       hnae3_set_bit(req->vport_vlan_cfg, HCLGE_SHOW_TAG2_EN_B,
+                     vcfg->vlan2_vlan_prionly ? 1 : 0);
 
        req->vf_offset = vport->vport_id / HCLGE_VF_NUM_PER_CMD;
        req->vf_bitmap[req->vf_offset] =
@@ -4999,16 +4952,15 @@ static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mtu)
 
        req = (struct hclge_config_max_frm_size_cmd *)desc.data;
        req->max_frm_size = cpu_to_le16(max_frm_size);
+       req->min_frm_size = HCLGE_MAC_MIN_FRAME;
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret) {
+       if (ret)
                dev_err(&hdev->pdev->dev, "set mtu fail, ret =%d.\n", ret);
-               return ret;
-       }
-
-       hdev->mps = max_frm_size;
+       else
+               hdev->mps = max_frm_size;
 
-       return 0;
+       return ret;
 }
 
 static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
@@ -5043,7 +4995,7 @@ static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
 
        req = (struct hclge_reset_tqp_queue_cmd *)desc.data;
        req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
-       hnae_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable);
+       hnae3_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable);
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
        if (ret) {
@@ -5073,7 +5025,7 @@ static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
                return ret;
        }
 
-       return hnae_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
+       return hnae3_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
 }
 
 static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle,
@@ -5380,12 +5332,12 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle,
        phy_write(phydev, HCLGE_PHY_PAGE_REG, HCLGE_PHY_PAGE_MDIX);
 
        retval = phy_read(phydev, HCLGE_PHY_CSC_REG);
-       mdix_ctrl = hnae_get_field(retval, HCLGE_PHY_MDIX_CTRL_M,
-                                  HCLGE_PHY_MDIX_CTRL_S);
+       mdix_ctrl = hnae3_get_field(retval, HCLGE_PHY_MDIX_CTRL_M,
+                                   HCLGE_PHY_MDIX_CTRL_S);
 
        retval = phy_read(phydev, HCLGE_PHY_CSS_REG);
-       mdix = hnae_get_bit(retval, HCLGE_PHY_MDIX_STATUS_B);
-       is_resolved = hnae_get_bit(retval, HCLGE_PHY_SPEED_DUP_RESOLVE_B);
+       mdix = hnae3_get_bit(retval, HCLGE_PHY_MDIX_STATUS_B);
+       is_resolved = hnae3_get_bit(retval, HCLGE_PHY_SPEED_DUP_RESOLVE_B);
 
        phy_write(phydev, HCLGE_PHY_PAGE_REG, HCLGE_PHY_PAGE_COPPER);
 
@@ -5531,7 +5483,6 @@ static int hclge_pci_init(struct hclge_dev *hdev)
 
        pci_set_master(pdev);
        hw = &hdev->hw;
-       hw->back = hdev;
        hw->io_base = pcim_iomap(pdev, 2, 0);
        if (!hw->io_base) {
                dev_err(&pdev->dev, "Can't map configuration register space\n");
@@ -5562,6 +5513,30 @@ static void hclge_pci_uninit(struct hclge_dev *hdev)
        pci_disable_device(pdev);
 }
 
+static void hclge_state_init(struct hclge_dev *hdev)
+{
+       set_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state);
+       set_bit(HCLGE_STATE_DOWN, &hdev->state);
+       clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state);
+       clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+       clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state);
+       clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
+}
+
+static void hclge_state_uninit(struct hclge_dev *hdev)
+{
+       set_bit(HCLGE_STATE_DOWN, &hdev->state);
+
+       if (hdev->service_timer.function)
+               del_timer_sync(&hdev->service_timer);
+       if (hdev->service_task.func)
+               cancel_work_sync(&hdev->service_task);
+       if (hdev->rst_service_task.func)
+               cancel_work_sync(&hdev->rst_service_task);
+       if (hdev->mbx_service_task.func)
+               cancel_work_sync(&hdev->mbx_service_task);
+}
+
 static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
        struct pci_dev *pdev = ae_dev->pdev;
@@ -5577,8 +5552,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
        hdev->pdev = pdev;
        hdev->ae_dev = ae_dev;
        hdev->reset_type = HNAE3_NONE_RESET;
-       hdev->reset_request = 0;
-       hdev->reset_pending = 0;
        ae_dev->priv = hdev;
 
        ret = hclge_pci_init(hdev);
@@ -5702,12 +5675,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
        /* Enable MISC vector(vector0) */
        hclge_enable_vector(&hdev->misc_vector, true);
 
-       set_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state);
-       set_bit(HCLGE_STATE_DOWN, &hdev->state);
-       clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state);
-       clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
-       clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state);
-       clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
+       hclge_state_init(hdev);
 
        pr_info("%s driver initialization finished.\n", HCLGE_DRIVER_NAME);
        return 0;
@@ -5812,16 +5780,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
        struct hclge_dev *hdev = ae_dev->priv;
        struct hclge_mac *mac = &hdev->hw.mac;
 
-       set_bit(HCLGE_STATE_DOWN, &hdev->state);
-
-       if (hdev->service_timer.function)
-               del_timer_sync(&hdev->service_timer);
-       if (hdev->service_task.func)
-               cancel_work_sync(&hdev->service_task);
-       if (hdev->rst_service_task.func)
-               cancel_work_sync(&hdev->rst_service_task);
-       if (hdev->mbx_service_task.func)
-               cancel_work_sync(&hdev->mbx_service_task);
+       hclge_state_uninit(hdev);
 
        if (mac->phydev)
                mdiobus_unregister(mac->mdio_bus);
@@ -5905,6 +5864,7 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num)
        u32 *rss_indir;
        int ret, i;
 
+       /* Free old tqps, and reallocate with new tqp number when nic setup */
        hclge_release_tqp(vport);
 
        ret = hclge_knic_setup(vport, new_tqps_num);
@@ -6149,8 +6109,8 @@ static int hclge_set_led_status(struct hclge_dev *hdev, u8 locate_led_status)
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_LED_STATUS_CFG, false);
 
        req = (struct hclge_set_led_state_cmd *)desc.data;
-       hnae_set_field(req->locate_led_config, HCLGE_LED_LOCATE_STATE_M,
-                      HCLGE_LED_LOCATE_STATE_S, locate_led_status);
+       hnae3_set_field(req->locate_led_config, HCLGE_LED_LOCATE_STATE_M,
+                       HCLGE_LED_LOCATE_STATE_S, locate_led_status);
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
        if (ret)
@@ -6280,7 +6240,6 @@ static const struct hnae3_ae_ops hclge_ops = {
 
 static struct hnae3_ae_algo ae_algo = {
        .ops = &hclge_ops,
-       .name = HCLGE_NAME,
        .pdev_id_table = ae_algo_pci_tbl,
 };
 
index 7488534528cdbea4e3ec70f0ca346a5d8453d315..1528fb3fa6be6d4da6afcba7bdbbf5e1a7608171 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016~2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HCLGE_MAIN_H
 #define __HCLGE_MAIN_H
@@ -22,8 +16,6 @@
 
 #define HCLGE_INVALID_VPORT 0xffff
 
-#define HCLGE_ROCE_VECTOR_OFFSET       96
-
 #define HCLGE_PF_CFG_BLOCK_SIZE                32
 #define HCLGE_PF_CFG_DESC_NUM \
        (HCLGE_PF_CFG_BLOCK_SIZE / HCLGE_CFG_RD_LEN_BYTES)
@@ -40,7 +32,7 @@
 #define HCLGE_RSS_HASH_ALGO_TOEPLITZ   0
 #define HCLGE_RSS_HASH_ALGO_SIMPLE     1
 #define HCLGE_RSS_HASH_ALGO_SYMMETRIC  2
-#define HCLGE_RSS_HASH_ALGO_MASK       0xf
+#define HCLGE_RSS_HASH_ALGO_MASK       GENMASK(3, 0)
 #define HCLGE_RSS_CFG_TBL_NUM \
        (HCLGE_RSS_IND_TBL_SIZE / HCLGE_RSS_CFG_TBL_SIZE)
 
 /* Copper Specific Status Register */
 #define HCLGE_PHY_CSS_REG              17
 
-#define HCLGE_PHY_MDIX_CTRL_S          (5)
+#define HCLGE_PHY_MDIX_CTRL_S          5
 #define HCLGE_PHY_MDIX_CTRL_M          GENMASK(6, 5)
 
-#define HCLGE_PHY_MDIX_STATUS_B        (6)
-#define HCLGE_PHY_SPEED_DUP_RESOLVE_B  (11)
+#define HCLGE_PHY_MDIX_STATUS_B                6
+#define HCLGE_PHY_SPEED_DUP_RESOLVE_B  11
 
 /* Factor used to calculate offset and bitmap of VF num */
 #define HCLGE_VF_NUM_PER_CMD           64
 
 /* Reset related Registers */
 #define HCLGE_MISC_RESET_STS_REG       0x20700
+#define HCLGE_MISC_VECTOR_INT_STS      0x20800
 #define HCLGE_GLOBAL_RESET_REG         0x20A00
-#define HCLGE_GLOBAL_RESET_BIT         0x0
-#define HCLGE_CORE_RESET_BIT           0x1
+#define HCLGE_GLOBAL_RESET_BIT         0
+#define HCLGE_CORE_RESET_BIT           1
 #define HCLGE_FUN_RST_ING              0x20C00
 #define HCLGE_FUN_RST_ING_B            0
 
@@ -128,6 +121,7 @@ enum HCLGE_DEV_STATE {
        HCLGE_STATE_MBX_SERVICE_SCHED,
        HCLGE_STATE_MBX_HANDLING,
        HCLGE_STATE_STATISTICS_UPDATING,
+       HCLGE_STATE_CMD_DISABLE,
        HCLGE_STATE_MAX
 };
 
@@ -138,12 +132,6 @@ enum hclge_evt_cause {
 };
 
 #define HCLGE_MPF_ENBALE 1
-struct hclge_caps {
-       u16 num_tqp;
-       u16 num_buffer_cell;
-       u32 flag;
-       u16 vmdq;
-};
 
 enum HCLGE_MAC_SPEED {
        HCLGE_MAC_SPEED_10M     = 10,           /* 10 Mbps */
@@ -189,8 +177,6 @@ struct hclge_hw {
        struct hclge_mac mac;
        int num_vec;
        struct hclge_cmq cmq;
-       struct hclge_caps caps;
-       void *back;
 };
 
 /* TQP stats */
@@ -202,7 +188,10 @@ struct hlcge_tqp_stats {
 };
 
 struct hclge_tqp {
-       struct device *dev;     /* Device for DMA mapping */
+       /* copy of device pointer from pci_dev,
+        * used when perform DMA mapping
+        */
+       struct device *dev;
        struct hnae3_queue q;
        struct hlcge_tqp_stats tqp_stats;
        u16 index;      /* Global index in a NIC controller */
@@ -492,13 +481,11 @@ struct hclge_dev {
        u16 num_tqps;                   /* Num task queue pairs of this PF */
        u16 num_req_vfs;                /* Num VFs requested for this PF */
 
-       /* Base task tqp physical id of this PF */
-       u16 base_tqp_pid;
+       u16 base_tqp_pid;       /* Base task tqp physical id of this PF */
        u16 alloc_rss_size;             /* Allocated RSS task queue */
        u16 rss_size_max;               /* HW defined max RSS task queue */
 
-       /* Num of guaranteed filters for this PF */
-       u16 fdir_pf_filter_count;
+       u16 fdir_pf_filter_count; /* Num of guaranteed filters for this PF */
        u16 num_alloc_vport;            /* Num vports this driver supports */
        u32 numa_node_mask;
        u16 rx_buf_len;
@@ -520,6 +507,7 @@ struct hclge_dev {
        u16 num_msi;
        u16 num_msi_left;
        u16 num_msi_used;
+       u16 roce_base_msix_offset;
        u32 base_msi_vector;
        u16 *vector_status;
        int *vector_irq;
@@ -560,7 +548,7 @@ struct hclge_dev {
        u32 mps; /* Max packet size */
 
        enum hclge_mta_dmac_sel_type mta_mac_sel_type;
-       bool enable_mta; /* Mutilcast filter enable */
+       bool enable_mta; /* Multicast filter enable */
 
        struct hclge_vlan_type_cfg vlan_type_cfg;
 
index 7541cb9b71ce2a6c5886dbd9bcf64028dfb36101..f34851c91eb39432705a6206959feffa7cc56529 100644 (file)
@@ -104,13 +104,15 @@ static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head)
        }
 }
 
-/* hclge_get_ring_chain_from_mbx: get ring type & tqpid from mailbox message
+/* hclge_get_ring_chain_from_mbx: get ring type & tqp id & int_gl idx
+ * from mailbox message
  * msg[0]: opcode
  * msg[1]: <not relevant to this function>
  * msg[2]: ring_num
  * msg[3]: first ring type (TX|RX)
  * msg[4]: first tqp id
- * msg[5] ~ msg[14]: other ring type and tqp id
+ * msg[5]: first int_gl idx
+ * msg[6] ~ msg[14]: other ring type, tqp id and int_gl idx
  */
 static int hclge_get_ring_chain_from_mbx(
                        struct hclge_mbx_vf_to_pf_cmd *req,
@@ -128,12 +130,12 @@ static int hclge_get_ring_chain_from_mbx(
                HCLGE_MBX_RING_NODE_VARIABLE_NUM))
                return -ENOMEM;
 
-       hnae_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, req->msg[3]);
+       hnae3_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, req->msg[3]);
        ring_chain->tqp_index =
                        hclge_get_queue_id(vport->nic.kinfo.tqp[req->msg[4]]);
-       hnae_set_field(ring_chain->int_gl_idx, HCLGE_INT_GL_IDX_M,
-                      HCLGE_INT_GL_IDX_S,
-                      req->msg[5]);
+       hnae3_set_field(ring_chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
+                       HNAE3_RING_GL_IDX_S,
+                       req->msg[5]);
 
        cur_chain = ring_chain;
 
@@ -142,19 +144,19 @@ static int hclge_get_ring_chain_from_mbx(
                if (!new_chain)
                        goto err;
 
-               hnae_set_bit(new_chain->flag, HNAE3_RING_TYPE_B,
-                            req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
-                            HCLGE_MBX_RING_MAP_BASIC_MSG_NUM]);
+               hnae3_set_bit(new_chain->flag, HNAE3_RING_TYPE_B,
+                             req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+                             HCLGE_MBX_RING_MAP_BASIC_MSG_NUM]);
 
                new_chain->tqp_index =
                hclge_get_queue_id(vport->nic.kinfo.tqp
                        [req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
                        HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 1]]);
 
-               hnae_set_field(new_chain->int_gl_idx, HCLGE_INT_GL_IDX_M,
-                              HCLGE_INT_GL_IDX_S,
-                              req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
-                              HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 2]);
+               hnae3_set_field(new_chain->int_gl_idx, HNAE3_RING_GL_IDX_M,
+                               HNAE3_RING_GL_IDX_S,
+                               req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+                               HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 2]);
 
                cur_chain->next = new_chain;
                cur_chain = new_chain;
@@ -460,7 +462,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
                req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data;
 
                flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
-               if (unlikely(!hnae_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B))) {
+               if (unlikely(!hnae3_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B))) {
                        dev_warn(&hdev->pdev->dev,
                                 "dropped invalid mailbox message, code = %d\n",
                                 req->msg[0]);
index 9f7932e423b5ec3efdad1b7d0cce4f39a1847e07..2065ee2fd358d73fc6bd3c0c1eb29f5c48f15cf7 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016~2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/etherdevice.h>
 #include <linux/kernel.h>
@@ -67,16 +61,16 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum,
 
        mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
 
-       hnae_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
-                      HCLGE_MDIO_PHYID_S, phyid);
-       hnae_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
-                      HCLGE_MDIO_PHYREG_S, regnum);
+       hnae3_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
+                       HCLGE_MDIO_PHYID_S, phyid);
+       hnae3_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
+                       HCLGE_MDIO_PHYREG_S, regnum);
 
-       hnae_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
-       hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
-                      HCLGE_MDIO_CTRL_ST_S, 1);
-       hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_OP_M,
-                      HCLGE_MDIO_CTRL_OP_S, HCLGE_MDIO_C22_WRITE);
+       hnae3_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
+       hnae3_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
+                       HCLGE_MDIO_CTRL_ST_S, 1);
+       hnae3_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_OP_M,
+                       HCLGE_MDIO_CTRL_OP_S, HCLGE_MDIO_C22_WRITE);
 
        mdio_cmd->data_wr = cpu_to_le16(data);
 
@@ -105,16 +99,16 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
 
        mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
 
-       hnae_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
-                      HCLGE_MDIO_PHYID_S, phyid);
-       hnae_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
-                      HCLGE_MDIO_PHYREG_S, regnum);
+       hnae3_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
+                       HCLGE_MDIO_PHYID_S, phyid);
+       hnae3_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
+                       HCLGE_MDIO_PHYREG_S, regnum);
 
-       hnae_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
-       hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
-                      HCLGE_MDIO_CTRL_ST_S, 1);
-       hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_OP_M,
-                      HCLGE_MDIO_CTRL_OP_S, HCLGE_MDIO_C22_READ);
+       hnae3_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
+       hnae3_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
+                       HCLGE_MDIO_CTRL_ST_S, 1);
+       hnae3_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_OP_M,
+                       HCLGE_MDIO_CTRL_OP_S, HCLGE_MDIO_C22_READ);
 
        /* Read out phy data */
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -125,7 +119,7 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
                return ret;
        }
 
-       if (hnae_get_bit(le16_to_cpu(mdio_cmd->sta), HCLGE_MDIO_STA_B)) {
+       if (hnae3_get_bit(le16_to_cpu(mdio_cmd->sta), HCLGE_MDIO_STA_B)) {
                dev_err(&hdev->pdev->dev, "mdio read data error\n");
                return -EIO;
        }
index c5e91cfb8f2c24388bfff92aae344ea9b7e808e8..bb3ce35e0d66544432e69951dcaccfd35a0f92ea 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016-2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HCLGE_MDIO_H
 #define __HCLGE_MDIO_H
index 262c125f81375a8f91f9bccc899144ad01f2605a..5db70a1451c58c8f683fb67320f39e50640f5d6a 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016~2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/etherdevice.h>
 
@@ -1184,10 +1178,10 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
                        u16 qs_id = vport->qs_offset + tc;
                        u8 grp, sub_grp;
 
-                       grp = hnae_get_field(qs_id, HCLGE_BP_GRP_ID_M,
-                                            HCLGE_BP_GRP_ID_S);
-                       sub_grp = hnae_get_field(qs_id, HCLGE_BP_SUB_GRP_ID_M,
-                                                HCLGE_BP_SUB_GRP_ID_S);
+                       grp = hnae3_get_field(qs_id, HCLGE_BP_GRP_ID_M,
+                                             HCLGE_BP_GRP_ID_S);
+                       sub_grp = hnae3_get_field(qs_id, HCLGE_BP_SUB_GRP_ID_M,
+                                                 HCLGE_BP_SUB_GRP_ID_S);
                        if (i == grp)
                                qs_bitmap |= (1 << sub_grp);
 
@@ -1223,6 +1217,10 @@ static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
                tx_en = true;
                rx_en = true;
                break;
+       case HCLGE_FC_PFC:
+               tx_en = false;
+               rx_en = false;
+               break;
        default:
                tx_en = true;
                rx_en = true;
@@ -1240,8 +1238,9 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
        if (ret)
                return ret;
 
-       if (hdev->tm_info.fc_mode != HCLGE_FC_PFC)
-               return hclge_mac_pause_setup_hw(hdev);
+       ret = hclge_mac_pause_setup_hw(hdev);
+       if (ret)
+               return ret;
 
        /* Only DCB-supported dev supports qset back pressure and pfc cmd */
        if (!hnae3_dev_dcb_supported(hdev))
index c2b6e8a6700f067fa38511fdc34b27a1eb1dc75e..dd4c194747c16cbf4a716d811dd4102615231e2f 100644 (file)
@@ -1,11 +1,5 @@
-/*
- * Copyright (c) 2016~2017 Hisilicon Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2016-2017 Hisilicon Limited.
 
 #ifndef __HCLGE_TM_H
 #define __HCLGE_TM_H
@@ -123,10 +117,11 @@ struct hclge_port_shapping_cmd {
 };
 
 #define hclge_tm_set_field(dest, string, val) \
-                       hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
-                                      (HCLGE_TM_SHAP_##string##_LSH), val)
+                          hnae3_set_field((dest), \
+                          (HCLGE_TM_SHAP_##string##_MSK), \
+                          (HCLGE_TM_SHAP_##string##_LSH), val)
 #define hclge_tm_get_field(src, string) \
-                       hnae_get_field((src), (HCLGE_TM_SHAP_##string##_MSK), \
+                       hnae3_get_field((src), (HCLGE_TM_SHAP_##string##_MSK), \
                                       (HCLGE_TM_SHAP_##string##_LSH))
 
 int hclge_tm_schd_init(struct hclge_dev *hdev);
index 1bbfe131b596e498c8c12dff3339b5e48bec6acd..fb471fe2c4946692e1c36b31bf92ec325812a372 100644 (file)
@@ -76,32 +76,24 @@ static int hclgevf_alloc_cmd_desc(struct hclgevf_cmq_ring *ring)
 {
        int size = ring->desc_num * sizeof(struct hclgevf_desc);
 
-       ring->desc = kzalloc(size, GFP_KERNEL);
+       ring->desc = dma_zalloc_coherent(cmq_ring_to_dev(ring),
+                                        size, &ring->desc_dma_addr,
+                                        GFP_KERNEL);
        if (!ring->desc)
                return -ENOMEM;
 
-       ring->desc_dma_addr = dma_map_single(cmq_ring_to_dev(ring), ring->desc,
-                                            size, DMA_BIDIRECTIONAL);
-
-       if (dma_mapping_error(cmq_ring_to_dev(ring), ring->desc_dma_addr)) {
-               ring->desc_dma_addr = 0;
-               kfree(ring->desc);
-               ring->desc = NULL;
-               return -ENOMEM;
-       }
-
        return 0;
 }
 
 static void hclgevf_free_cmd_desc(struct hclgevf_cmq_ring *ring)
 {
-       dma_unmap_single(cmq_ring_to_dev(ring), ring->desc_dma_addr,
-                        ring->desc_num * sizeof(ring->desc[0]),
-                        hclgevf_ring_to_dma_dir(ring));
+       int size  = ring->desc_num * sizeof(struct hclgevf_desc);
 
-       ring->desc_dma_addr = 0;
-       kfree(ring->desc);
-       ring->desc = NULL;
+       if (ring->desc) {
+               dma_free_coherent(cmq_ring_to_dev(ring), size,
+                                 ring->desc, ring->desc_dma_addr);
+               ring->desc = NULL;
+       }
 }
 
 static int hclgevf_init_cmd_queue(struct hclgevf_dev *hdev,
index 621c6cbacf767c983a4a26efc7f2871c747cfa64..19b32860309ca9a8b5412a760bf66707a6a6b0b4 100644 (file)
@@ -82,6 +82,7 @@ struct hclgevf_cmq {
 enum hclgevf_opcode_type {
        /* Generic command */
        HCLGEVF_OPC_QUERY_FW_VER        = 0x0001,
+       HCLGEVF_OPC_QUERY_VF_RSRC       = 0x0024,
        /* TQP command */
        HCLGEVF_OPC_QUERY_TX_STATUS     = 0x0B03,
        HCLGEVF_OPC_QUERY_RX_STATUS     = 0x0B13,
@@ -134,6 +135,19 @@ struct hclgevf_query_version_cmd {
        __le32 firmware_rsv[5];
 };
 
+#define HCLGEVF_MSIX_OFT_ROCEE_S       0
+#define HCLGEVF_MSIX_OFT_ROCEE_M       (0xffff << HCLGEVF_MSIX_OFT_ROCEE_S)
+#define HCLGEVF_VEC_NUM_S              0
+#define HCLGEVF_VEC_NUM_M              (0xff << HCLGEVF_VEC_NUM_S)
+struct hclgevf_query_res_cmd {
+       __le16 tqp_num;
+       __le16 reserved;
+       __le16 msixcap_localid_ba_nic;
+       __le16 msixcap_localid_ba_rocee;
+       __le16 vf_intr_vector_number;
+       __le16 rsv[7];
+};
+
 #define HCLGEVF_RSS_HASH_KEY_OFFSET    4
 #define HCLGEVF_RSS_HASH_KEY_NUM       16
 struct hclgevf_rss_config_cmd {
index a17872aab168112e637ba64747d09ee343712e8c..9c0091f2addfcff7bd565963c43003b210c3d571 100644 (file)
@@ -330,6 +330,12 @@ static int hclgevf_set_handle_info(struct hclgevf_dev *hdev)
 
 static void hclgevf_free_vector(struct hclgevf_dev *hdev, int vector_id)
 {
+       if (hdev->vector_status[vector_id] == HCLGEVF_INVALID_VPORT) {
+               dev_warn(&hdev->pdev->dev,
+                        "vector(vector_id %d) has been freed.\n", vector_id);
+               return;
+       }
+
        hdev->vector_status[vector_id] = HCLGEVF_INVALID_VPORT;
        hdev->num_msi_left += 1;
        hdev->num_msi_used -= 1;
@@ -444,12 +450,12 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev,  u16 rss_size)
 
        hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_TC_MODE, false);
        for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) {
-               hnae_set_bit(req->rss_tc_mode[i], HCLGEVF_RSS_TC_VALID_B,
-                            (tc_valid[i] & 0x1));
-               hnae_set_field(req->rss_tc_mode[i], HCLGEVF_RSS_TC_SIZE_M,
-                              HCLGEVF_RSS_TC_SIZE_S, tc_size[i]);
-               hnae_set_field(req->rss_tc_mode[i], HCLGEVF_RSS_TC_OFFSET_M,
-                              HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]);
+               hnae3_set_bit(req->rss_tc_mode[i], HCLGEVF_RSS_TC_VALID_B,
+                             (tc_valid[i] & 0x1));
+               hnae3_set_field(req->rss_tc_mode[i], HCLGEVF_RSS_TC_SIZE_M,
+                               HCLGEVF_RSS_TC_SIZE_S, tc_size[i]);
+               hnae3_set_field(req->rss_tc_mode[i], HCLGEVF_RSS_TC_OFFSET_M,
+                               HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]);
        }
        status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
        if (status)
@@ -547,24 +553,18 @@ static int hclgevf_get_tc_size(struct hnae3_handle *handle)
 }
 
 static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
-                                      int vector,
+                                      int vector_id,
                                       struct hnae3_ring_chain_node *ring_chain)
 {
        struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
        struct hnae3_ring_chain_node *node;
        struct hclge_mbx_vf_to_pf_cmd *req;
        struct hclgevf_desc desc;
-       int i = 0, vector_id;
+       int i = 0;
        int status;
        u8 type;
 
        req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
-       vector_id = hclgevf_get_vector_index(hdev, vector);
-       if (vector_id < 0) {
-               dev_err(&handle->pdev->dev,
-                       "Get vector index fail. ret =%d\n", vector_id);
-               return vector_id;
-       }
 
        for (node = ring_chain; node; node = node->next) {
                int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM +
@@ -582,11 +582,11 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
                }
 
                req->msg[idx_offset] =
-                               hnae_get_bit(node->flag, HNAE3_RING_TYPE_B);
+                               hnae3_get_bit(node->flag, HNAE3_RING_TYPE_B);
                req->msg[idx_offset + 1] = node->tqp_index;
-               req->msg[idx_offset + 2] = hnae_get_field(node->int_gl_idx,
-                                                         HNAE3_RING_GL_IDX_M,
-                                                         HNAE3_RING_GL_IDX_S);
+               req->msg[idx_offset + 2] = hnae3_get_field(node->int_gl_idx,
+                                                          HNAE3_RING_GL_IDX_M,
+                                                          HNAE3_RING_GL_IDX_S);
 
                i++;
                if ((i == (HCLGE_MBX_VF_MSG_DATA_NUM -
@@ -617,7 +617,17 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 static int hclgevf_map_ring_to_vector(struct hnae3_handle *handle, int vector,
                                      struct hnae3_ring_chain_node *ring_chain)
 {
-       return hclgevf_bind_ring_to_vector(handle, true, vector, ring_chain);
+       struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+       int vector_id;
+
+       vector_id = hclgevf_get_vector_index(hdev, vector);
+       if (vector_id < 0) {
+               dev_err(&handle->pdev->dev,
+                       "Get vector index fail. ret =%d\n", vector_id);
+               return vector_id;
+       }
+
+       return hclgevf_bind_ring_to_vector(handle, true, vector_id, ring_chain);
 }
 
 static int hclgevf_unmap_ring_from_vector(
@@ -635,7 +645,7 @@ static int hclgevf_unmap_ring_from_vector(
                return vector_id;
        }
 
-       ret = hclgevf_bind_ring_to_vector(handle, false, vector, ring_chain);
+       ret = hclgevf_bind_ring_to_vector(handle, false, vector_id, ring_chain);
        if (ret)
                dev_err(&handle->pdev->dev,
                        "Unmap ring from vector fail. vector=%d, ret =%d\n",
@@ -648,8 +658,17 @@ static int hclgevf_unmap_ring_from_vector(
 static int hclgevf_put_vector(struct hnae3_handle *handle, int vector)
 {
        struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+       int vector_id;
 
-       hclgevf_free_vector(hdev, vector);
+       vector_id = hclgevf_get_vector_index(hdev, vector);
+       if (vector_id < 0) {
+               dev_err(&handle->pdev->dev,
+                       "hclgevf_put_vector get vector index fail. ret =%d\n",
+                       vector_id);
+               return vector_id;
+       }
+
+       hclgevf_free_vector(hdev, vector_id);
 
        return 0;
 }
@@ -990,8 +1009,8 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
 
        /* wait to check the hardware reset completion status */
        val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
-       while (hnae_get_bit(val, HCLGEVF_FUN_RST_ING_B) &&
-                           (cnt < HCLGEVF_RESET_WAIT_CNT)) {
+       while (hnae3_get_bit(val, HCLGEVF_FUN_RST_ING_B) &&
+              (cnt < HCLGEVF_RESET_WAIT_CNT)) {
                msleep(HCLGEVF_RESET_WAIT_MS);
                val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
                cnt++;
@@ -1351,14 +1370,13 @@ static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev)
        struct hnae3_handle *roce = &hdev->roce;
        struct hnae3_handle *nic = &hdev->nic;
 
-       roce->rinfo.num_vectors = HCLGEVF_ROCEE_VECTOR_NUM;
+       roce->rinfo.num_vectors = hdev->num_roce_msix;
 
        if (hdev->num_msi_left < roce->rinfo.num_vectors ||
            hdev->num_msi_left == 0)
                return -EINVAL;
 
-       roce->rinfo.base_vector =
-               hdev->vector_status[hdev->num_msi_used];
+       roce->rinfo.base_vector = hdev->roce_base_vector;
 
        roce->rinfo.netdev = nic->kinfo.netdev;
        roce->rinfo.roce_io_base = hdev->hw.io_base;
@@ -1501,10 +1519,15 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
        if (hclgevf_dev_ongoing_reset(hdev))
                return 0;
 
-       hdev->num_msi = HCLGEVF_MAX_VF_VECTOR_NUM;
+       if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B))
+               vectors = pci_alloc_irq_vectors(pdev,
+                                               hdev->roce_base_msix_offset + 1,
+                                               hdev->num_msi,
+                                               PCI_IRQ_MSIX);
+       else
+               vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+                                               PCI_IRQ_MSI | PCI_IRQ_MSIX);
 
-       vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
-                                       PCI_IRQ_MSI | PCI_IRQ_MSIX);
        if (vectors < 0) {
                dev_err(&pdev->dev,
                        "failed(%d) to allocate MSI/MSI-X vectors\n",
@@ -1519,6 +1542,7 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
        hdev->num_msi = vectors;
        hdev->num_msi_left = vectors;
        hdev->base_msi_vector = pdev->irq;
+       hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset;
 
        hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
                                           sizeof(u16), GFP_KERNEL);
@@ -1582,9 +1606,10 @@ static void hclgevf_misc_irq_uninit(struct hclgevf_dev *hdev)
        hclgevf_free_vector(hdev, 0);
 }
 
-static int hclgevf_init_instance(struct hclgevf_dev *hdev,
-                                struct hnae3_client *client)
+static int hclgevf_init_client_instance(struct hnae3_client *client,
+                                       struct hnae3_ae_dev *ae_dev)
 {
+       struct hclgevf_dev *hdev = ae_dev->priv;
        int ret;
 
        switch (client->type) {
@@ -1635,9 +1660,11 @@ static int hclgevf_init_instance(struct hclgevf_dev *hdev,
        return 0;
 }
 
-static void hclgevf_uninit_instance(struct hclgevf_dev *hdev,
-                                   struct hnae3_client *client)
+static void hclgevf_uninit_client_instance(struct hnae3_client *client,
+                                          struct hnae3_ae_dev *ae_dev)
 {
+       struct hclgevf_dev *hdev = ae_dev->priv;
+
        /* un-init roce, if it exists */
        if (hdev->roce_client)
                hdev->roce_client->ops->uninit_instance(&hdev->roce, 0);
@@ -1648,22 +1675,6 @@ static void hclgevf_uninit_instance(struct hclgevf_dev *hdev,
                client->ops->uninit_instance(&hdev->nic, 0);
 }
 
-static int hclgevf_register_client(struct hnae3_client *client,
-                                  struct hnae3_ae_dev *ae_dev)
-{
-       struct hclgevf_dev *hdev = ae_dev->priv;
-
-       return hclgevf_init_instance(hdev, client);
-}
-
-static void hclgevf_unregister_client(struct hnae3_client *client,
-                                     struct hnae3_ae_dev *ae_dev)
-{
-       struct hclgevf_dev *hdev = ae_dev->priv;
-
-       hclgevf_uninit_instance(hdev, client);
-}
-
 static int hclgevf_pci_init(struct hclgevf_dev *hdev)
 {
        struct pci_dev *pdev = hdev->pdev;
@@ -1727,6 +1738,45 @@ static void hclgevf_pci_uninit(struct hclgevf_dev *hdev)
        pci_disable_device(pdev);
 }
 
+static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev)
+{
+       struct hclgevf_query_res_cmd *req;
+       struct hclgevf_desc desc;
+       int ret;
+
+       hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_QUERY_VF_RSRC, true);
+       ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "query vf resource failed, ret = %d.\n", ret);
+               return ret;
+       }
+
+       req = (struct hclgevf_query_res_cmd *)desc.data;
+
+       if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) {
+               hdev->roce_base_msix_offset =
+               hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
+                               HCLGEVF_MSIX_OFT_ROCEE_M,
+                               HCLGEVF_MSIX_OFT_ROCEE_S);
+               hdev->num_roce_msix =
+               hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
+                               HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
+
+               /* VF should have NIC vectors and Roce vectors, NIC vectors
+                * are queued before Roce vectors. The offset is fixed to 64.
+                */
+               hdev->num_msi = hdev->num_roce_msix +
+                               hdev->roce_base_msix_offset;
+       } else {
+               hdev->num_msi =
+               hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
+                               HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
+       }
+
+       return 0;
+}
+
 static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 {
        struct pci_dev *pdev = hdev->pdev;
@@ -1744,18 +1794,26 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
                return ret;
        }
 
+       ret = hclgevf_cmd_init(hdev);
+       if (ret)
+               goto err_cmd_init;
+
+       /* Get vf resource */
+       ret = hclgevf_query_vf_resource(hdev);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "Query vf status error, ret = %d.\n", ret);
+               goto err_query_vf;
+       }
+
        ret = hclgevf_init_msi(hdev);
        if (ret) {
                dev_err(&pdev->dev, "failed(%d) to init MSI/MSI-X\n", ret);
-               goto err_irq_init;
+               goto err_query_vf;
        }
 
        hclgevf_state_init(hdev);
 
-       ret = hclgevf_cmd_init(hdev);
-       if (ret)
-               goto err_cmd_init;
-
        ret = hclgevf_misc_irq_init(hdev);
        if (ret) {
                dev_err(&pdev->dev, "failed(%d) to init Misc IRQ(vector0)\n",
@@ -1811,11 +1869,11 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 err_config:
        hclgevf_misc_irq_uninit(hdev);
 err_misc_irq_init:
-       hclgevf_cmd_uninit(hdev);
-err_cmd_init:
        hclgevf_state_uninit(hdev);
        hclgevf_uninit_msi(hdev);
-err_irq_init:
+err_query_vf:
+       hclgevf_cmd_uninit(hdev);
+err_cmd_init:
        hclgevf_pci_uninit(hdev);
        return ret;
 }
@@ -1924,8 +1982,8 @@ void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
 static const struct hnae3_ae_ops hclgevf_ops = {
        .init_ae_dev = hclgevf_init_ae_dev,
        .uninit_ae_dev = hclgevf_uninit_ae_dev,
-       .init_client_instance = hclgevf_register_client,
-       .uninit_client_instance = hclgevf_unregister_client,
+       .init_client_instance = hclgevf_init_client_instance,
+       .uninit_client_instance = hclgevf_uninit_client_instance,
        .start = hclgevf_ae_start,
        .stop = hclgevf_ae_stop,
        .map_ring_to_vector = hclgevf_map_ring_to_vector,
@@ -1962,7 +2020,6 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 
 static struct hnae3_ae_algo ae_algovf = {
        .ops = &hclgevf_ops,
-       .name = HCLGEVF_NAME,
        .pdev_id_table = ae_algovf_pci_tbl,
 };
 
index 0656e8e5c5f0a340a5591be9b7a448747b0a3b76..b23ba171473c9c3cee18ba05d4f08adeb8ac15a6 100644 (file)
@@ -12,7 +12,6 @@
 #define HCLGEVF_MOD_VERSION "1.0"
 #define HCLGEVF_DRIVER_NAME "hclgevf"
 
-#define HCLGEVF_ROCEE_VECTOR_NUM       0
 #define HCLGEVF_MISC_VECTOR_NUM                0
 
 #define HCLGEVF_INVALID_VPORT          0xffff
@@ -150,6 +149,9 @@ struct hclgevf_dev {
        u16 num_msi;
        u16 num_msi_left;
        u16 num_msi_used;
+       u16 num_roce_msix;      /* Num of roce vectors for this VF */
+       u16 roce_base_msix_offset;
+       int roce_base_vector;
        u32 base_msi_vector;
        u16 *vector_status;
        int *vector_irq;
index b598c06af8e09e708b5a9c63865eb9208198b060..e9d5a4f96304e114722caea9c21509a4e0b6cc6c 100644 (file)
@@ -152,7 +152,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                req = (struct hclge_mbx_pf_to_vf_cmd *)desc->data;
 
                flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
-               if (unlikely(!hnae_get_bit(flag, HCLGEVF_CMDQ_RX_OUTVLD_B))) {
+               if (unlikely(!hnae3_get_bit(flag, HCLGEVF_CMDQ_RX_OUTVLD_B))) {
                        dev_warn(&hdev->pdev->dev,
                                 "dropped invalid mailbox message, code = %d\n",
                                 req->msg[0]);
@@ -208,7 +208,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 
                        /* tail the async message in arq */
                        msg_q = hdev->arq.msg_q[hdev->arq.tail];
-                       memcpy(&msg_q[0], req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE);
+                       memcpy(&msg_q[0], req->msg,
+                              HCLGE_MBX_MAX_ARQ_MSG_SIZE * sizeof(u16));
                        hclge_mbx_tail_ptr_move_arq(hdev->arq);
                        hdev->arq.count++;
 
index 79b56744708427eb741a1fc8f964f28beb3f0961..6b19607a4caac0f846186917b9c4b286f3b0b9b6 100644 (file)
@@ -264,7 +264,6 @@ static int init_fw_ctxt(struct hinic_hwdev *hwdev)
        struct hinic_hwif *hwif = hwdev->hwif;
        struct pci_dev *pdev = hwif->pdev;
        struct hinic_cmd_fw_ctxt fw_ctxt;
-       struct hinic_pfhwdev *pfhwdev;
        u16 out_size;
        int err;
 
@@ -276,8 +275,6 @@ static int init_fw_ctxt(struct hinic_hwdev *hwdev)
        fw_ctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
        fw_ctxt.rx_buf_sz = HINIC_RX_BUF_SZ;
 
-       pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
-
        err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_FWCTXT_INIT,
                                 &fw_ctxt, sizeof(fw_ctxt),
                                 &fw_ctxt, &out_size);
index c944bd10b03d29c5711903714456ac9ca975e735..51762428b40e6ef91e567c4f60c3459cae275402 100644 (file)
@@ -7522,7 +7522,7 @@ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
        case TC_CLSFLOWER_STATS:
                return -EOPNOTSUPP;
        default:
-               return -EINVAL;
+               return -EOPNOTSUPP;
        }
 }
 
@@ -7554,7 +7554,7 @@ static int i40e_setup_tc_block(struct net_device *dev,
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb,
-                                            np, np);
+                                            np, np, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np);
                return 0;
@@ -11841,7 +11841,6 @@ static int i40e_xdp(struct net_device *dev,
        case XDP_SETUP_PROG:
                return i40e_xdp_setup(vsi, xdp->prog);
        case XDP_QUERY_PROG:
-               xdp->prog_attached = i40e_enabled_xdp_vsi(vsi);
                xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
                return 0;
        default:
@@ -11978,7 +11977,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
                snprintf(netdev->name, IFNAMSIZ, "%.*sv%%d",
                         IFNAMSIZ - 4,
                         pf->vsi[pf->lan_vsi]->netdev->name);
-               random_ether_addr(mac_addr);
+               eth_random_addr(mac_addr);
 
                spin_lock_bh(&vsi->mac_filter_hash_lock);
                i40e_add_mac_filter(vsi, mac_addr);
index a7b87f93541138c497056f9c91dacbd91c519fc5..5906c1c1d19d82d7e37b0a891e457fea792b4153 100644 (file)
@@ -2884,7 +2884,7 @@ static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter,
        case TC_CLSFLOWER_STATS:
                return -EOPNOTSUPP;
        default:
-               return -EINVAL;
+               return -EOPNOTSUPP;
        }
 }
 
@@ -2926,7 +2926,7 @@ static int i40evf_setup_tc_block(struct net_device *dev,
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb,
-                                            adapter, adapter);
+                                            adapter, adapter, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb,
                                        adapter);
index b13b42e5a1d9925351dde946f8389202adb4bd91..a795c07d0df77989956df489d5d16ec7e09b3ed7 100644 (file)
@@ -225,19 +225,7 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
        hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >>
                        E1000_STATUS_FUNC_SHIFT;
 
-       /* Make sure the PHY is in a good state. Several people have reported
-        * firmware leaving the PHY's page select register set to something
-        * other than the default of zero, which causes the PHY ID read to
-        * access something other than the intended register.
-        */
-       ret_val = hw->phy.ops.reset(hw);
-       if (ret_val) {
-               hw_dbg("Error resetting the PHY.\n");
-               goto out;
-       }
-
        /* Set phy->phy_addr and phy->id. */
-       igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, 0);
        ret_val = igb_get_phy_id_82575(hw);
        if (ret_val)
                return ret_val;
index 252440a418dc4ba0dc0693cfb5a3b4dd69fd6b28..8a28f3388f699bf30df581af4f9aa08ed5f2b567 100644 (file)
 #define E1000_TQAVCTRL_XMIT_MODE       BIT(0)
 #define E1000_TQAVCTRL_DATAFETCHARB    BIT(4)
 #define E1000_TQAVCTRL_DATATRANARB     BIT(8)
+#define E1000_TQAVCTRL_DATATRANTIM     BIT(9)
+#define E1000_TQAVCTRL_SP_WAIT_SR      BIT(10)
+/* Fetch Time Delta - bits 31:16
+ *
+ * This field holds the value to be reduced from the launch time for
+ * fetch time decision. The FetchTimeDelta value is defined in 32 ns
+ * granularity.
+ *
+ * This field is 16 bits wide, and so the maximum value is:
+ *
+ * 65535 * 32 = 2097120 ~= 2.1 msec
+ *
+ * XXX: We are configuring the max value here since we couldn't come up
+ * with a reason for not doing so.
+ */
+#define E1000_TQAVCTRL_FETCHTIME_DELTA (0xFFFF << 16)
 
 /* TX Qav Credit Control fields */
 #define E1000_TQAVCC_IDLESLOPE_MASK    0xFFFF
index 9643b5b3d444b2aa19d0f73045d4a72911e24fd4..ca54e268d157bd9afb7ab23854a8ae52ff260215 100644 (file)
@@ -262,6 +262,7 @@ struct igb_ring {
        u16 count;                      /* number of desc. in the ring */
        u8 queue_index;                 /* logical index of the ring*/
        u8 reg_idx;                     /* physical index of the ring */
+       bool launchtime_enable;         /* true if LaunchTime is enabled */
        bool cbs_enable;                /* indicates if CBS is enabled */
        s32 idleslope;                  /* idleSlope in kbps */
        s32 sendslope;                  /* sendSlope in kbps */
index f707709969acfee137d30b698304dfd99f7c45e7..25720d95d4ea36110db7353bda8528385bb09823 100644 (file)
@@ -1654,33 +1654,65 @@ static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode)
        wr32(E1000_I210_TQAVCC(queue), val);
 }
 
+static bool is_any_cbs_enabled(struct igb_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               if (adapter->tx_ring[i]->cbs_enable)
+                       return true;
+       }
+
+       return false;
+}
+
+static bool is_any_txtime_enabled(struct igb_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               if (adapter->tx_ring[i]->launchtime_enable)
+                       return true;
+       }
+
+       return false;
+}
+
 /**
- *  igb_configure_cbs - Configure Credit-Based Shaper (CBS)
+ *  igb_config_tx_modes - Configure "Qav Tx mode" features on igb
  *  @adapter: pointer to adapter struct
  *  @queue: queue number
- *  @enable: true = enable CBS, false = disable CBS
- *  @idleslope: idleSlope in kbps
- *  @sendslope: sendSlope in kbps
- *  @hicredit: hiCredit in bytes
- *  @locredit: loCredit in bytes
  *
- *  Configure CBS for a given hardware queue. When disabling, idleslope,
- *  sendslope, hicredit, locredit arguments are ignored. Returns 0 if
- *  success. Negative otherwise.
+ *  Configure CBS and Launchtime for a given hardware queue.
+ *  Parameters are retrieved from the correct Tx ring, so
+ *  igb_save_cbs_params() and igb_save_txtime_params() should be used
+ *  for setting those correctly prior to this function being called.
  **/
-static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
-                             bool enable, int idleslope, int sendslope,
-                             int hicredit, int locredit)
+static void igb_config_tx_modes(struct igb_adapter *adapter, int queue)
 {
+       struct igb_ring *ring = adapter->tx_ring[queue];
        struct net_device *netdev = adapter->netdev;
        struct e1000_hw *hw = &adapter->hw;
-       u32 tqavcc;
+       u32 tqavcc, tqavctrl;
        u16 value;
 
        WARN_ON(hw->mac.type != e1000_i210);
        WARN_ON(queue < 0 || queue > 1);
 
-       if (enable || queue == 0) {
+       /* If any of the Qav features is enabled, configure queues as SR and
+        * with HIGH PRIO. If none is, then configure them with LOW PRIO and
+        * as SP.
+        */
+       if (ring->cbs_enable || ring->launchtime_enable) {
+               set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH);
+               set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION);
+       } else {
+               set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW);
+               set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY);
+       }
+
+       /* If CBS is enabled, set DataTranARB and config its parameters. */
+       if (ring->cbs_enable || queue == 0) {
                /* i210 does not allow the queue 0 to be in the Strict
                 * Priority mode while the Qav mode is enabled, so,
                 * instead of disabling strict priority mode, we give
@@ -1690,14 +1722,19 @@ static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
                 * Queue0 QueueMode must be set to 1b when
                 * TransmitMode is set to Qav."
                 */
-               if (queue == 0 && !enable) {
+               if (queue == 0 && !ring->cbs_enable) {
                        /* max "linkspeed" idleslope in kbps */
-                       idleslope = 1000000;
-                       hicredit = ETH_FRAME_LEN;
+                       ring->idleslope = 1000000;
+                       ring->hicredit = ETH_FRAME_LEN;
                }
 
-               set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH);
-               set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION);
+               /* Always set data transfer arbitration to credit-based
+                * shaper algorithm on TQAVCTRL if CBS is enabled for any of
+                * the queues.
+                */
+               tqavctrl = rd32(E1000_I210_TQAVCTRL);
+               tqavctrl |= E1000_TQAVCTRL_DATATRANARB;
+               wr32(E1000_I210_TQAVCTRL, tqavctrl);
 
                /* According to i210 datasheet section 7.2.7.7, we should set
                 * the 'idleSlope' field from TQAVCC register following the
@@ -1756,17 +1793,16 @@ static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
                 *       calculated value, so the resulting bandwidth might
                 *       be slightly higher for some configurations.
                 */
-               value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000);
+               value = DIV_ROUND_UP_ULL(ring->idleslope * 61034ULL, 1000000);
 
                tqavcc = rd32(E1000_I210_TQAVCC(queue));
                tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
                tqavcc |= value;
                wr32(E1000_I210_TQAVCC(queue), tqavcc);
 
-               wr32(E1000_I210_TQAVHC(queue), 0x80000000 + hicredit * 0x7735);
+               wr32(E1000_I210_TQAVHC(queue),
+                    0x80000000 + ring->hicredit * 0x7735);
        } else {
-               set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW);
-               set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY);
 
                /* Set idleSlope to zero. */
                tqavcc = rd32(E1000_I210_TQAVCC(queue));
@@ -1775,6 +1811,43 @@ static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
 
                /* Set hiCredit to zero. */
                wr32(E1000_I210_TQAVHC(queue), 0);
+
+               /* If CBS is not enabled for any queues anymore, then return to
+                * the default state of Data Transmission Arbitration on
+                * TQAVCTRL.
+                */
+               if (!is_any_cbs_enabled(adapter)) {
+                       tqavctrl = rd32(E1000_I210_TQAVCTRL);
+                       tqavctrl &= ~E1000_TQAVCTRL_DATATRANARB;
+                       wr32(E1000_I210_TQAVCTRL, tqavctrl);
+               }
+       }
+
+       /* If LaunchTime is enabled, set DataTranTIM. */
+       if (ring->launchtime_enable) {
+               /* Always set DataTranTIM on TQAVCTRL if LaunchTime is enabled
+                * for any of the SR queues, and configure fetchtime delta.
+                * XXX NOTE:
+                *     - LaunchTime will be enabled for all SR queues.
+                *     - A fixed offset can be added relative to the launch
+                *       time of all packets if configured at reg LAUNCH_OS0.
+                *       We are keeping it as 0 for now (default value).
+                */
+               tqavctrl = rd32(E1000_I210_TQAVCTRL);
+               tqavctrl |= E1000_TQAVCTRL_DATATRANTIM |
+                      E1000_TQAVCTRL_FETCHTIME_DELTA;
+               wr32(E1000_I210_TQAVCTRL, tqavctrl);
+       } else {
+               /* If Launchtime is not enabled for any SR queues anymore,
+                * then clear DataTranTIM on TQAVCTRL and clear fetchtime delta,
+                * effectively disabling Launchtime.
+                */
+               if (!is_any_txtime_enabled(adapter)) {
+                       tqavctrl = rd32(E1000_I210_TQAVCTRL);
+                       tqavctrl &= ~E1000_TQAVCTRL_DATATRANTIM;
+                       tqavctrl &= ~E1000_TQAVCTRL_FETCHTIME_DELTA;
+                       wr32(E1000_I210_TQAVCTRL, tqavctrl);
+               }
        }
 
        /* XXX: In i210 controller the sendSlope and loCredit parameters from
@@ -1782,9 +1855,27 @@ static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
         * configuration' in respect to these parameters.
         */
 
-       netdev_dbg(netdev, "CBS %s: queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n",
-                  (enable) ? "enabled" : "disabled", queue,
-                  idleslope, sendslope, hicredit, locredit);
+       netdev_dbg(netdev, "Qav Tx mode: cbs %s, launchtime %s, queue %d \
+                           idleslope %d sendslope %d hiCredit %d \
+                           locredit %d\n",
+                  (ring->cbs_enable) ? "enabled" : "disabled",
+                  (ring->launchtime_enable) ? "enabled" : "disabled", queue,
+                  ring->idleslope, ring->sendslope, ring->hicredit,
+                  ring->locredit);
+}
+
+static int igb_save_txtime_params(struct igb_adapter *adapter, int queue,
+                                 bool enable)
+{
+       struct igb_ring *ring;
+
+       if (queue < 0 || queue > adapter->num_tx_queues)
+               return -EINVAL;
+
+       ring = adapter->tx_ring[queue];
+       ring->launchtime_enable = enable;
+
+       return 0;
 }
 
 static int igb_save_cbs_params(struct igb_adapter *adapter, int queue,
@@ -1807,21 +1898,15 @@ static int igb_save_cbs_params(struct igb_adapter *adapter, int queue,
        return 0;
 }
 
-static bool is_any_cbs_enabled(struct igb_adapter *adapter)
-{
-       struct igb_ring *ring;
-       int i;
-
-       for (i = 0; i < adapter->num_tx_queues; i++) {
-               ring = adapter->tx_ring[i];
-
-               if (ring->cbs_enable)
-                       return true;
-       }
-
-       return false;
-}
-
+/**
+ *  igb_setup_tx_mode - Switch to/from Qav Tx mode when applicable
+ *  @adapter: pointer to adapter struct
+ *
+ *  Configure TQAVCTRL register switching the controller's Tx mode
+ *  if FQTSS mode is enabled or disabled. Additionally, will issue
+ *  a call to igb_config_tx_modes() per queue so any previously saved
+ *  Tx parameters are applied.
+ **/
 static void igb_setup_tx_mode(struct igb_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
@@ -1836,11 +1921,11 @@ static void igb_setup_tx_mode(struct igb_adapter *adapter)
                int i, max_queue;
 
                /* Configure TQAVCTRL register: set transmit mode to 'Qav',
-                * set data fetch arbitration to 'round robin' and set data
-                * transfer arbitration to 'credit shaper algorithm.
+                * set data fetch arbitration to 'round robin', set SP_WAIT_SR
+                * so SP queues wait for SR ones.
                 */
                val = rd32(E1000_I210_TQAVCTRL);
-               val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_DATATRANARB;
+               val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_SP_WAIT_SR;
                val &= ~E1000_TQAVCTRL_DATAFETCHARB;
                wr32(E1000_I210_TQAVCTRL, val);
 
@@ -1881,11 +1966,7 @@ static void igb_setup_tx_mode(struct igb_adapter *adapter)
                            adapter->num_tx_queues : I210_SR_QUEUES_NUM;
 
                for (i = 0; i < max_queue; i++) {
-                       struct igb_ring *ring = adapter->tx_ring[i];
-
-                       igb_configure_cbs(adapter, i, ring->cbs_enable,
-                                         ring->idleslope, ring->sendslope,
-                                         ring->hicredit, ring->locredit);
+                       igb_config_tx_modes(adapter, i);
                }
        } else {
                wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
@@ -2459,6 +2540,19 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev,
        return features;
 }
 
+static void igb_offload_apply(struct igb_adapter *adapter, s32 queue)
+{
+       if (!is_fqtss_enabled(adapter)) {
+               enable_fqtss(adapter, true);
+               return;
+       }
+
+       igb_config_tx_modes(adapter, queue);
+
+       if (!is_any_cbs_enabled(adapter) && !is_any_txtime_enabled(adapter))
+               enable_fqtss(adapter, false);
+}
+
 static int igb_offload_cbs(struct igb_adapter *adapter,
                           struct tc_cbs_qopt_offload *qopt)
 {
@@ -2479,17 +2573,7 @@ static int igb_offload_cbs(struct igb_adapter *adapter,
        if (err)
                return err;
 
-       if (is_fqtss_enabled(adapter)) {
-               igb_configure_cbs(adapter, qopt->queue, qopt->enable,
-                                 qopt->idleslope, qopt->sendslope,
-                                 qopt->hicredit, qopt->locredit);
-
-               if (!is_any_cbs_enabled(adapter))
-                       enable_fqtss(adapter, false);
-
-       } else {
-               enable_fqtss(adapter, true);
-       }
+       igb_offload_apply(adapter, qopt->queue);
 
        return 0;
 }
@@ -2698,7 +2782,7 @@ static int igb_setup_tc_cls_flower(struct igb_adapter *adapter,
        case TC_CLSFLOWER_STATS:
                return -EOPNOTSUPP;
        default:
-               return -EINVAL;
+               return -EOPNOTSUPP;
        }
 }
 
@@ -2728,7 +2812,7 @@ static int igb_setup_tc_block(struct igb_adapter *adapter,
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block, igb_setup_tc_block_cb,
-                                            adapter, adapter);
+                                            adapter, adapter, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb,
                                        adapter);
@@ -2738,6 +2822,29 @@ static int igb_setup_tc_block(struct igb_adapter *adapter,
        }
 }
 
+static int igb_offload_txtime(struct igb_adapter *adapter,
+                             struct tc_etf_qopt_offload *qopt)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       int err;
+
+       /* Launchtime offloading is only supported by i210 controller. */
+       if (hw->mac.type != e1000_i210)
+               return -EOPNOTSUPP;
+
+       /* Launchtime offloading is only supported by queues 0 and 1. */
+       if (qopt->queue < 0 || qopt->queue > 1)
+               return -EINVAL;
+
+       err = igb_save_txtime_params(adapter, qopt->queue, qopt->enable);
+       if (err)
+               return err;
+
+       igb_offload_apply(adapter, qopt->queue);
+
+       return 0;
+}
+
 static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
                        void *type_data)
 {
@@ -2748,6 +2855,8 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
                return igb_offload_cbs(adapter, type_data);
        case TC_SETUP_BLOCK:
                return igb_setup_tc_block(adapter, type_data);
+       case TC_SETUP_QDISC_ETF:
+               return igb_offload_txtime(adapter, type_data);
 
        default:
                return -EOPNOTSUPP;
@@ -5568,11 +5677,14 @@ set_itr_now:
        }
 }
 
-static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
-                           u32 type_tucmd, u32 mss_l4len_idx)
+static void igb_tx_ctxtdesc(struct igb_ring *tx_ring,
+                           struct igb_tx_buffer *first,
+                           u32 vlan_macip_lens, u32 type_tucmd,
+                           u32 mss_l4len_idx)
 {
        struct e1000_adv_tx_context_desc *context_desc;
        u16 i = tx_ring->next_to_use;
+       struct timespec64 ts;
 
        context_desc = IGB_TX_CTXTDESC(tx_ring, i);
 
@@ -5587,9 +5699,18 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
                mss_l4len_idx |= tx_ring->reg_idx << 4;
 
        context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
-       context_desc->seqnum_seed       = 0;
        context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
        context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
+
+       /* We assume there is always a valid tx time available. Invalid times
+        * should have been handled by the upper layers.
+        */
+       if (tx_ring->launchtime_enable) {
+               ts = ns_to_timespec64(first->skb->tstamp);
+               context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
+       } else {
+               context_desc->seqnum_seed = 0;
+       }
 }
 
 static int igb_tso(struct igb_ring *tx_ring,
@@ -5672,7 +5793,8 @@ static int igb_tso(struct igb_ring *tx_ring,
        vlan_macip_lens |= (ip.hdr - skb->data) << E1000_ADVTXD_MACLEN_SHIFT;
        vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
 
-       igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
+       igb_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
+                       type_tucmd, mss_l4len_idx);
 
        return 1;
 }
@@ -5727,7 +5849,7 @@ no_csum:
        vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
        vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
 
-       igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0);
+       igb_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
 }
 
 #define IGB_SET_FLAG(_input, _flag, _result) \
@@ -5909,7 +6031,7 @@ static int igb_tx_map(struct igb_ring *tx_ring,
         * We also need this memory barrier to make certain all of the
         * status bits have been updated before next_to_watch is written.
         */
-       wmb();
+       dma_wmb();
 
        /* set next_to_watch value indicating a packet is present */
        first->next_to_watch = tx_desc;
@@ -6015,8 +6137,6 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
                }
        }
 
-       skb_tx_timestamp(skb);
-
        if (skb_vlan_tag_present(skb)) {
                tx_flags |= IGB_TX_FLAGS_VLAN;
                tx_flags |= (skb_vlan_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
@@ -6032,6 +6152,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
        else if (!tso)
                igb_tx_csum(tx_ring, first);
 
+       skb_tx_timestamp(skb);
+
        if (igb_tx_map(tx_ring, first, hdr_len))
                goto cleanup_tx_tstamp;
 
@@ -8409,7 +8531,7 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
                 * applicable for weak-ordered memory model archs,
                 * such as IA-64).
                 */
-               wmb();
+               dma_wmb();
                writel(i, rx_ring->tail);
        }
 }
index 144d5fe6b94477def970671ab94cd1fb77711bdb..4fc906c6166b34c790ebe60dc228d52756cf7eb4 100644 (file)
@@ -855,7 +855,8 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *);
 void ixgbe_free_tx_resources(struct ixgbe_ring *);
 void ixgbe_configure_rx_ring(struct ixgbe_adapter *, struct ixgbe_ring *);
 void ixgbe_configure_tx_ring(struct ixgbe_adapter *, struct ixgbe_ring *);
-void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_ring *);
+void ixgbe_disable_rx(struct ixgbe_adapter *adapter);
+void ixgbe_disable_tx(struct ixgbe_adapter *adapter);
 void ixgbe_update_stats(struct ixgbe_adapter *adapter);
 int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
 bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
index bd1ba88ec1d562fbb114a8fc75077edab634ff89..e5a8461fe6a99bfbf8ab20b85e38c0f0c24e0bb5 100644 (file)
@@ -511,7 +511,7 @@ static void ixgbe_set_msglevel(struct net_device *netdev, u32 data)
 
 static int ixgbe_get_regs_len(struct net_device *netdev)
 {
-#define IXGBE_REGS_LEN  1139
+#define IXGBE_REGS_LEN  1145
        return IXGBE_REGS_LEN * sizeof(u32);
 }
 
@@ -874,6 +874,14 @@ static void ixgbe_get_regs(struct net_device *netdev,
        /* X540 specific DCB registers  */
        regs_buff[1137] = IXGBE_READ_REG(hw, IXGBE_RTTQCNCR);
        regs_buff[1138] = IXGBE_READ_REG(hw, IXGBE_RTTQCNTG);
+
+       /* Security config registers */
+       regs_buff[1139] = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+       regs_buff[1140] = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT);
+       regs_buff[1141] = IXGBE_READ_REG(hw, IXGBE_SECTXBUFFAF);
+       regs_buff[1142] = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+       regs_buff[1143] = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+       regs_buff[1144] = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT);
 }
 
 static int ixgbe_get_eeprom_len(struct net_device *netdev)
@@ -1690,35 +1698,17 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
 
 static void ixgbe_free_desc_rings(struct ixgbe_adapter *adapter)
 {
-       struct ixgbe_ring *tx_ring = &adapter->test_tx_ring;
-       struct ixgbe_ring *rx_ring = &adapter->test_rx_ring;
-       struct ixgbe_hw *hw = &adapter->hw;
-       u32 reg_ctl;
-
-       /* shut down the DMA engines now so they can be reinitialized later */
+       /* Shut down the DMA engines now so they can be reinitialized later,
+        * since the test rings and normally used rings should overlap on
+        * queue 0 we can just use the standard disable Rx/Tx calls and they
+        * will take care of disabling the test rings for us.
+        */
 
        /* first Rx */
-       hw->mac.ops.disable_rx(hw);
-       ixgbe_disable_rx_queue(adapter, rx_ring);
+       ixgbe_disable_rx(adapter);
 
        /* now Tx */
-       reg_ctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(tx_ring->reg_idx));
-       reg_ctl &= ~IXGBE_TXDCTL_ENABLE;
-       IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(tx_ring->reg_idx), reg_ctl);
-
-       switch (hw->mac.type) {
-       case ixgbe_mac_82599EB:
-       case ixgbe_mac_X540:
-       case ixgbe_mac_X550:
-       case ixgbe_mac_X550EM_x:
-       case ixgbe_mac_x550em_a:
-               reg_ctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
-               reg_ctl &= ~IXGBE_DMATXCTL_TE;
-               IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, reg_ctl);
-               break;
-       default:
-               break;
-       }
+       ixgbe_disable_tx(adapter);
 
        ixgbe_reset(adapter);
 
index 62e57b05a0aed3d9a02bf8d473aa49505608728f..447098005490926f67e5fb32f7c84b798e387eab 100644 (file)
@@ -4022,38 +4022,6 @@ static void ixgbe_rx_desc_queue_enable(struct ixgbe_adapter *adapter,
        }
 }
 
-void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter,
-                           struct ixgbe_ring *ring)
-{
-       struct ixgbe_hw *hw = &adapter->hw;
-       int wait_loop = IXGBE_MAX_RX_DESC_POLL;
-       u32 rxdctl;
-       u8 reg_idx = ring->reg_idx;
-
-       if (ixgbe_removed(hw->hw_addr))
-               return;
-       rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
-       rxdctl &= ~IXGBE_RXDCTL_ENABLE;
-
-       /* write value back with RXDCTL.ENABLE bit cleared */
-       IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
-
-       if (hw->mac.type == ixgbe_mac_82598EB &&
-           !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
-               return;
-
-       /* the hardware may take up to 100us to really disable the rx queue */
-       do {
-               udelay(10);
-               rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
-       } while (--wait_loop && (rxdctl & IXGBE_RXDCTL_ENABLE));
-
-       if (!wait_loop) {
-               e_err(drv, "RXDCTL.ENABLE on Rx queue %d not cleared within "
-                     "the polling period\n", reg_idx);
-       }
-}
-
 void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
                             struct ixgbe_ring *ring)
 {
@@ -4063,9 +4031,13 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
        u32 rxdctl;
        u8 reg_idx = ring->reg_idx;
 
-       /* disable queue to avoid issues while updating state */
+       /* disable queue to avoid use of these values while updating state */
        rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
-       ixgbe_disable_rx_queue(adapter, ring);
+       rxdctl &= ~IXGBE_RXDCTL_ENABLE;
+
+       /* write value back with RXDCTL.ENABLE bit cleared */
+       IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
+       IXGBE_WRITE_FLUSH(hw);
 
        IXGBE_WRITE_REG(hw, IXGBE_RDBAL(reg_idx), (rdba & DMA_BIT_MASK(32)));
        IXGBE_WRITE_REG(hw, IXGBE_RDBAH(reg_idx), (rdba >> 32));
@@ -5275,6 +5247,8 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
 static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
                             struct ixgbe_fwd_adapter *accel)
 {
+       u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
+       int num_tc = netdev_get_num_tc(adapter->netdev);
        struct net_device *vdev = accel->netdev;
        int i, baseq, err;
 
@@ -5286,6 +5260,11 @@ static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
        accel->rx_base_queue = baseq;
        accel->tx_base_queue = baseq;
 
+       /* record configuration for macvlan interface in vdev */
+       for (i = 0; i < num_tc; i++)
+               netdev_bind_sb_channel_queue(adapter->netdev, vdev,
+                                            i, rss_i, baseq + (rss_i * i));
+
        for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
                adapter->rx_ring[baseq + i]->netdev = vdev;
 
@@ -5310,6 +5289,10 @@ static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
 
        netdev_err(vdev, "L2FW offload disabled due to L2 filter error\n");
 
+       /* unbind the queues and drop the subordinate channel config */
+       netdev_unbind_sb_channel(adapter->netdev, vdev);
+       netdev_set_sb_channel(vdev, 0);
+
        clear_bit(accel->pool, adapter->fwd_bitmask);
        kfree(accel);
 
@@ -5622,6 +5605,212 @@ void ixgbe_up(struct ixgbe_adapter *adapter)
        ixgbe_up_complete(adapter);
 }
 
+static unsigned long ixgbe_get_completion_timeout(struct ixgbe_adapter *adapter)
+{
+       u16 devctl2;
+
+       pcie_capability_read_word(adapter->pdev, PCI_EXP_DEVCTL2, &devctl2);
+
+       switch (devctl2 & IXGBE_PCIDEVCTRL2_TIMEO_MASK) {
+       case IXGBE_PCIDEVCTRL2_17_34s:
+       case IXGBE_PCIDEVCTRL2_4_8s:
+               /* For now we cap the upper limit on delay to 2 seconds
+                * as we end up going up to 34 seconds of delay in worst
+                * case timeout value.
+                */
+       case IXGBE_PCIDEVCTRL2_1_2s:
+               return 2000000ul;       /* 2.0 s */
+       case IXGBE_PCIDEVCTRL2_260_520ms:
+               return 520000ul;        /* 520 ms */
+       case IXGBE_PCIDEVCTRL2_65_130ms:
+               return 130000ul;        /* 130 ms */
+       case IXGBE_PCIDEVCTRL2_16_32ms:
+               return 32000ul;         /* 32 ms */
+       case IXGBE_PCIDEVCTRL2_1_2ms:
+               return 2000ul;          /* 2 ms */
+       case IXGBE_PCIDEVCTRL2_50_100us:
+               return 100ul;           /* 100 us */
+       case IXGBE_PCIDEVCTRL2_16_32ms_def:
+               return 32000ul;         /* 32 ms */
+       default:
+               break;
+       }
+
+       /* We shouldn't need to hit this path, but just in case default as
+        * though completion timeout is not supported and support 32ms.
+        */
+       return 32000ul;
+}
+
+void ixgbe_disable_rx(struct ixgbe_adapter *adapter)
+{
+       unsigned long wait_delay, delay_interval;
+       struct ixgbe_hw *hw = &adapter->hw;
+       int i, wait_loop;
+       u32 rxdctl;
+
+       /* disable receives */
+       hw->mac.ops.disable_rx(hw);
+
+       if (ixgbe_removed(hw->hw_addr))
+               return;
+
+       /* disable all enabled Rx queues */
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               struct ixgbe_ring *ring = adapter->rx_ring[i];
+               u8 reg_idx = ring->reg_idx;
+
+               rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+               rxdctl &= ~IXGBE_RXDCTL_ENABLE;
+               rxdctl |= IXGBE_RXDCTL_SWFLSH;
+
+               /* write value back with RXDCTL.ENABLE bit cleared */
+               IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
+       }
+
+       /* RXDCTL.EN may not change on 82598 if link is down, so skip it */
+       if (hw->mac.type == ixgbe_mac_82598EB &&
+           !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
+               return;
+
+       /* Determine our minimum delay interval. We will increase this value
+        * with each subsequent test. This way if the device returns quickly
+        * we should spend as little time as possible waiting, however as
+        * the time increases we will wait for larger periods of time.
+        *
+        * The trick here is that we increase the interval using the
+        * following pattern: 1x 3x 5x 7x 9x 11x 13x 15x 17x 19x. The result
+        * of that wait is that it totals up to 100x whatever interval we
+        * choose. Since our minimum wait is 100us we can just divide the
+        * total timeout by 100 to get our minimum delay interval.
+        */
+       delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
+
+       wait_loop = IXGBE_MAX_RX_DESC_POLL;
+       wait_delay = delay_interval;
+
+       while (wait_loop--) {
+               usleep_range(wait_delay, wait_delay + 10);
+               wait_delay += delay_interval * 2;
+               rxdctl = 0;
+
+               /* OR together the reading of all the active RXDCTL registers,
+                * and then test the result. We need the disable to complete
+                * before we start freeing the memory and invalidating the
+                * DMA mappings.
+                */
+               for (i = 0; i < adapter->num_rx_queues; i++) {
+                       struct ixgbe_ring *ring = adapter->rx_ring[i];
+                       u8 reg_idx = ring->reg_idx;
+
+                       rxdctl |= IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+               }
+
+               if (!(rxdctl & IXGBE_RXDCTL_ENABLE))
+                       return;
+       }
+
+       e_err(drv,
+             "RXDCTL.ENABLE for one or more queues not cleared within the polling period\n");
+}
+
+void ixgbe_disable_tx(struct ixgbe_adapter *adapter)
+{
+       unsigned long wait_delay, delay_interval;
+       struct ixgbe_hw *hw = &adapter->hw;
+       int i, wait_loop;
+       u32 txdctl;
+
+       if (ixgbe_removed(hw->hw_addr))
+               return;
+
+       /* disable all enabled Tx queues */
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct ixgbe_ring *ring = adapter->tx_ring[i];
+               u8 reg_idx = ring->reg_idx;
+
+               IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
+       }
+
+       /* disable all enabled XDP Tx queues */
+       for (i = 0; i < adapter->num_xdp_queues; i++) {
+               struct ixgbe_ring *ring = adapter->xdp_ring[i];
+               u8 reg_idx = ring->reg_idx;
+
+               IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
+       }
+
+       /* If the link is not up there shouldn't be much in the way of
+        * pending transactions. Those that are left will be flushed out
+        * when the reset logic goes through the flush sequence to clean out
+        * the pending Tx transactions.
+        */
+       if (!(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
+               goto dma_engine_disable;
+
+       /* Determine our minimum delay interval. We will increase this value
+        * with each subsequent test. This way if the device returns quickly
+        * we should spend as little time as possible waiting, however as
+        * the time increases we will wait for larger periods of time.
+        *
+        * The trick here is that we increase the interval using the
+        * following pattern: 1x 3x 5x 7x 9x 11x 13x 15x 17x 19x. The result
+        * of that wait is that it totals up to 100x whatever interval we
+        * choose. Since our minimum wait is 100us we can just divide the
+        * total timeout by 100 to get our minimum delay interval.
+        */
+       delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
+
+       wait_loop = IXGBE_MAX_RX_DESC_POLL;
+       wait_delay = delay_interval;
+
+       while (wait_loop--) {
+               usleep_range(wait_delay, wait_delay + 10);
+               wait_delay += delay_interval * 2;
+               txdctl = 0;
+
+               /* OR together the reading of all the active TXDCTL registers,
+                * and then test the result. We need the disable to complete
+                * before we start freeing the memory and invalidating the
+                * DMA mappings.
+                */
+               for (i = 0; i < adapter->num_tx_queues; i++) {
+                       struct ixgbe_ring *ring = adapter->tx_ring[i];
+                       u8 reg_idx = ring->reg_idx;
+
+                       txdctl |= IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
+               }
+               for (i = 0; i < adapter->num_xdp_queues; i++) {
+                       struct ixgbe_ring *ring = adapter->xdp_ring[i];
+                       u8 reg_idx = ring->reg_idx;
+
+                       txdctl |= IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
+               }
+
+               if (!(txdctl & IXGBE_TXDCTL_ENABLE))
+                       goto dma_engine_disable;
+       }
+
+       e_err(drv,
+             "TXDCTL.ENABLE for one or more queues not cleared within the polling period\n");
+
+dma_engine_disable:
+       /* Disable the Tx DMA engine on 82599 and later MAC */
+       switch (hw->mac.type) {
+       case ixgbe_mac_82599EB:
+       case ixgbe_mac_X540:
+       case ixgbe_mac_X550:
+       case ixgbe_mac_X550EM_x:
+       case ixgbe_mac_x550em_a:
+               IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL,
+                               (IXGBE_READ_REG(hw, IXGBE_DMATXCTL) &
+                                ~IXGBE_DMATXCTL_TE));
+               /* fall through */
+       default:
+               break;
+       }
+}
+
 void ixgbe_reset(struct ixgbe_adapter *adapter)
 {
        struct ixgbe_hw *hw = &adapter->hw;
@@ -5803,24 +5992,19 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
        if (test_and_set_bit(__IXGBE_DOWN, &adapter->state))
                return; /* do nothing if already down */
 
-       /* disable receives */
-       hw->mac.ops.disable_rx(hw);
+       /* Shut off incoming Tx traffic */
+       netif_tx_stop_all_queues(netdev);
 
-       /* disable all enabled rx queues */
-       for (i = 0; i < adapter->num_rx_queues; i++)
-               /* this call also flushes the previous write */
-               ixgbe_disable_rx_queue(adapter, adapter->rx_ring[i]);
+       /* call carrier off first to avoid false dev_watchdog timeouts */
+       netif_carrier_off(netdev);
+       netif_tx_disable(netdev);
 
-       usleep_range(10000, 20000);
+       /* Disable Rx */
+       ixgbe_disable_rx(adapter);
 
        /* synchronize_sched() needed for pending XDP buffers to drain */
        if (adapter->xdp_ring[0])
                synchronize_sched();
-       netif_tx_stop_all_queues(netdev);
-
-       /* call carrier off first to avoid false dev_watchdog timeouts */
-       netif_carrier_off(netdev);
-       netif_tx_disable(netdev);
 
        ixgbe_irq_disable(adapter);
 
@@ -5848,30 +6032,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
        }
 
        /* disable transmits in the hardware now that interrupts are off */
-       for (i = 0; i < adapter->num_tx_queues; i++) {
-               u8 reg_idx = adapter->tx_ring[i]->reg_idx;
-               IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
-       }
-       for (i = 0; i < adapter->num_xdp_queues; i++) {
-               u8 reg_idx = adapter->xdp_ring[i]->reg_idx;
-
-               IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
-       }
-
-       /* Disable the Tx DMA engine on 82599 and later MAC */
-       switch (hw->mac.type) {
-       case ixgbe_mac_82599EB:
-       case ixgbe_mac_X540:
-       case ixgbe_mac_X550:
-       case ixgbe_mac_X550EM_x:
-       case ixgbe_mac_x550em_a:
-               IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL,
-                               (IXGBE_READ_REG(hw, IXGBE_DMATXCTL) &
-                                ~IXGBE_DMATXCTL_TE));
-               break;
-       default:
-               break;
-       }
+       ixgbe_disable_tx(adapter);
 
        if (!pci_channel_offline(adapter->pdev))
                ixgbe_reset(adapter);
@@ -6458,6 +6619,11 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
 {
        struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
+       if (adapter->xdp_prog) {
+               e_warn(probe, "MTU cannot be changed while XDP program is loaded\n");
+               return -EPERM;
+       }
+
        /*
         * For 82599EB we cannot allow legacy VFs to enable their receive
         * paths when MTU greater than 1500 is configured.  So display a
@@ -8197,25 +8363,25 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
                                              input, common, ring->queue_index);
 }
 
+#ifdef IXGBE_FCOE
 static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
-                             void *accel_priv, select_queue_fallback_t fallback)
+                             struct net_device *sb_dev,
+                             select_queue_fallback_t fallback)
 {
-       struct ixgbe_fwd_adapter *fwd_adapter = accel_priv;
        struct ixgbe_adapter *adapter;
-       int txq;
-#ifdef IXGBE_FCOE
        struct ixgbe_ring_feature *f;
-#endif
+       int txq;
 
-       if (fwd_adapter) {
-               adapter = netdev_priv(dev);
-               txq = reciprocal_scale(skb_get_hash(skb),
-                                      adapter->num_rx_queues_per_pool);
+       if (sb_dev) {
+               u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+               struct net_device *vdev = sb_dev;
 
-               return txq + fwd_adapter->tx_base_queue;
-       }
+               txq = vdev->tc_to_txq[tc].offset;
+               txq += reciprocal_scale(skb_get_hash(skb),
+                                       vdev->tc_to_txq[tc].count);
 
-#ifdef IXGBE_FCOE
+               return txq;
+       }
 
        /*
         * only execute the code below if protocol is FCoE
@@ -8226,11 +8392,11 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
        case htons(ETH_P_FIP):
                adapter = netdev_priv(dev);
 
-               if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
+               if (!sb_dev && (adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
                        break;
                /* fall through */
        default:
-               return fallback(dev, skb);
+               return fallback(dev, skb, sb_dev);
        }
 
        f = &adapter->ring_feature[RING_F_FCOE];
@@ -8242,11 +8408,9 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
                txq -= f->indices;
 
        return txq + f->offset;
-#else
-       return fallback(dev, skb);
-#endif
 }
 
+#endif
 static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
                               struct xdp_frame *xdpf)
 {
@@ -8766,6 +8930,11 @@ static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data)
        /* if we cannot find a free pool then disable the offload */
        netdev_err(vdev, "L2FW offload disabled due to lack of queue resources\n");
        macvlan_release_l2fw_offload(vdev);
+
+       /* unbind the queues and drop the subordinate channel config */
+       netdev_unbind_sb_channel(adapter->netdev, vdev);
+       netdev_set_sb_channel(vdev, 0);
+
        kfree(accel);
 
        return 0;
@@ -9329,7 +9498,7 @@ static int ixgbe_setup_tc_block(struct net_device *dev,
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb,
-                                            adapter, adapter);
+                                            adapter, adapter, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb,
                                        adapter);
@@ -9393,6 +9562,11 @@ static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
        if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE))
                features &= ~NETIF_F_LRO;
 
+       if (adapter->xdp_prog && (features & NETIF_F_LRO)) {
+               e_dev_err("LRO is not supported with XDP\n");
+               features &= ~NETIF_F_LRO;
+       }
+
        return features;
 }
 
@@ -9769,6 +9943,13 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
        if (!macvlan_supports_dest_filter(vdev))
                return ERR_PTR(-EMEDIUMTYPE);
 
+       /* We need to lock down the macvlan to be a single queue device so that
+        * we can reuse the tc_to_txq field in the macvlan netdev to represent
+        * the queue mapping to our netdev.
+        */
+       if (netif_is_multiqueue(vdev))
+               return ERR_PTR(-ERANGE);
+
        pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
        if (pool == adapter->num_rx_pools) {
                u16 used_pools = adapter->num_vfs + adapter->num_rx_pools;
@@ -9825,6 +10006,7 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
                return ERR_PTR(-ENOMEM);
 
        set_bit(pool, adapter->fwd_bitmask);
+       netdev_set_sb_channel(vdev, pool);
        accel->pool = pool;
        accel->netdev = vdev;
 
@@ -9866,6 +10048,10 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
                ring->netdev = NULL;
        }
 
+       /* unbind the queues and drop the subordinate channel config */
+       netdev_unbind_sb_channel(pdev, accel->netdev);
+       netdev_set_sb_channel(accel->netdev, 0);
+
        clear_bit(accel->pool, adapter->fwd_bitmask);
        kfree(accel);
 }
@@ -9966,7 +10152,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
        case XDP_SETUP_PROG:
                return ixgbe_xdp_setup(dev, xdp->prog);
        case XDP_QUERY_PROG:
-               xdp->prog_attached = !!(adapter->xdp_prog);
                xdp->prog_id = adapter->xdp_prog ?
                        adapter->xdp_prog->aux->id : 0;
                return 0;
@@ -10026,7 +10211,6 @@ static const struct net_device_ops ixgbe_netdev_ops = {
        .ndo_open               = ixgbe_open,
        .ndo_stop               = ixgbe_close,
        .ndo_start_xmit         = ixgbe_xmit_frame,
-       .ndo_select_queue       = ixgbe_select_queue,
        .ndo_set_rx_mode        = ixgbe_set_rx_mode,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = ixgbe_set_mac,
@@ -10049,6 +10233,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
        .ndo_poll_controller    = ixgbe_netpoll,
 #endif
 #ifdef IXGBE_FCOE
+       .ndo_select_queue       = ixgbe_select_queue,
        .ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get,
        .ndo_fcoe_ddp_target = ixgbe_fcoe_ddp_target,
        .ndo_fcoe_ddp_done = ixgbe_fcoe_ddp_put,
index 59416eddd8402154280890a24a7023645b24abe9..d86446d202d5ed95826db225139b095d7c7a683c 100644 (file)
@@ -4462,7 +4462,6 @@ static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
        case XDP_SETUP_PROG:
                return ixgbevf_xdp_setup(dev, xdp->prog);
        case XDP_QUERY_PROG:
-               xdp->prog_attached = !!(adapter->xdp_prog);
                xdp->prog_id = adapter->xdp_prog ?
                               adapter->xdp_prog->aux->id : 0;
                return 0;
index 06ff185eb1882ee6e6425baed1e35eca38d6bd2f..a5ab6f3403ae02c2001be9c092eccec6a116433e 100644 (file)
@@ -1911,10 +1911,10 @@ jme_wait_link(struct jme_adapter *jme)
 {
        u32 phylink, to = JME_WAIT_LINK_TIME;
 
-       mdelay(1000);
+       msleep(1000);
        phylink = jme_linkstat_from_phy(jme);
        while (!(phylink & PHY_LINK_UP) && (to -= 10) > 0) {
-               mdelay(10);
+               usleep_range(10000, 11000);
                phylink = jme_linkstat_from_phy(jme);
        }
 }
index afc81006944059837b5cbbdaaaa26d9dc87d82a0..7a637b51c7d2302b63f2eb579adea4e0459e7430 100644 (file)
@@ -563,14 +563,6 @@ ltq_etop_set_multicast_list(struct net_device *dev)
        spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static u16
-ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb,
-                     void *accel_priv, select_queue_fallback_t fallback)
-{
-       /* we are currently only using the first queue */
-       return 0;
-}
-
 static int
 ltq_etop_init(struct net_device *dev)
 {
@@ -641,7 +633,7 @@ static const struct net_device_ops ltq_eth_netdev_ops = {
        .ndo_set_mac_address = ltq_etop_set_mac_address,
        .ndo_validate_addr = eth_validate_addr,
        .ndo_set_rx_mode = ltq_etop_set_multicast_list,
-       .ndo_select_queue = ltq_etop_select_queue,
+       .ndo_select_queue = dev_pick_tx_zero,
        .ndo_init = ltq_etop_init,
        .ndo_tx_timeout = ltq_etop_tx_timeout,
 };
index 0ad2f3f7da85a029b5dea7dd3ce67b69d4ff8605..55c2a56c5dae52178af62a0ab7d97f6ed054a3cf 100644 (file)
 #define MVNETA_RSS_LU_TABLE_SIZE       1
 
 /* Max number of Rx descriptors */
-#define MVNETA_MAX_RXD 128
+#define MVNETA_MAX_RXD 512
 
 /* Max number of Tx descriptors */
-#define MVNETA_MAX_TXD 532
+#define MVNETA_MAX_TXD 1024
 
 /* Max number of allowed TCP segments for software TSO */
 #define MVNETA_MAX_TSO_SEGS 100
 
 enum {
        ETHTOOL_STAT_EEE_WAKEUP,
+       ETHTOOL_STAT_SKB_ALLOC_ERR,
+       ETHTOOL_STAT_REFILL_ERR,
        ETHTOOL_MAX_STATS,
 };
 
@@ -375,6 +377,8 @@ static const struct mvneta_statistic mvneta_statistics[] = {
        { 0x3054, T_REG_32, "fc_sent", },
        { 0x300c, T_REG_32, "internal_mac_transmit_err", },
        { ETHTOOL_STAT_EEE_WAKEUP, T_SW, "eee_wakeup_errors", },
+       { ETHTOOL_STAT_SKB_ALLOC_ERR, T_SW, "skb_alloc_errors", },
+       { ETHTOOL_STAT_REFILL_ERR, T_SW, "refill_errors", },
 };
 
 struct mvneta_pcpu_stats {
@@ -479,7 +483,10 @@ struct mvneta_port {
 #define MVNETA_RXD_ERR_RESOURCE                (BIT(17) | BIT(18))
 #define MVNETA_RXD_ERR_CODE_MASK       (BIT(17) | BIT(18))
 #define MVNETA_RXD_L3_IP4              BIT(25)
-#define MVNETA_RXD_FIRST_LAST_DESC     (BIT(26) | BIT(27))
+#define MVNETA_RXD_LAST_DESC           BIT(26)
+#define MVNETA_RXD_FIRST_DESC          BIT(27)
+#define MVNETA_RXD_FIRST_LAST_DESC     (MVNETA_RXD_FIRST_DESC | \
+                                        MVNETA_RXD_LAST_DESC)
 #define MVNETA_RXD_L4_CSUM_OK          BIT(30)
 
 #if defined(__LITTLE_ENDIAN)
@@ -589,9 +596,6 @@ struct mvneta_rx_queue {
        /* num of rx descriptors in the rx descriptor ring */
        int size;
 
-       /* counter of times when mvneta_refill() failed */
-       int missed;
-
        u32 pkts_coal;
        u32 time_coal;
 
@@ -609,6 +613,18 @@ struct mvneta_rx_queue {
 
        /* Index of the next RX DMA descriptor to process */
        int next_desc_to_proc;
+
+       /* Index of first RX DMA descriptor to refill */
+       int first_to_refill;
+       u32 refill_num;
+
+       /* pointer to uncomplete skb buffer */
+       struct sk_buff *skb;
+       int left_size;
+
+       /* error counters */
+       u32 skb_alloc_err;
+       u32 refill_err;
 };
 
 static enum cpuhp_state online_hpstate;
@@ -621,6 +637,7 @@ static int txq_number = 8;
 static int rxq_def;
 
 static int rx_copybreak __read_mostly = 256;
+static int rx_header_size __read_mostly = 128;
 
 /* HW BM need that each port be identify by a unique ID */
 static int global_port_id;
@@ -1684,13 +1701,6 @@ static void mvneta_rx_error(struct mvneta_port *pp,
 {
        u32 status = rx_desc->status;
 
-       if (!mvneta_rxq_desc_is_first_last(status)) {
-               netdev_err(pp->dev,
-                          "bad rx status %08x (buffer oversize), size=%d\n",
-                          status, rx_desc->data_size);
-               return;
-       }
-
        switch (status & MVNETA_RXD_ERR_CODE_MASK) {
        case MVNETA_RXD_ERR_CRC:
                netdev_err(pp->dev, "bad rx status %08x (crc error), size=%d\n",
@@ -1715,7 +1725,8 @@ static void mvneta_rx_error(struct mvneta_port *pp,
 static void mvneta_rx_csum(struct mvneta_port *pp, u32 status,
                           struct sk_buff *skb)
 {
-       if ((status & MVNETA_RXD_L3_IP4) &&
+       if ((pp->dev->features & NETIF_F_RXCSUM) &&
+           (status & MVNETA_RXD_L3_IP4) &&
            (status & MVNETA_RXD_L4_CSUM_OK)) {
                skb->csum = 0;
                skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1790,47 +1801,30 @@ static void mvneta_txq_done(struct mvneta_port *pp,
        }
 }
 
-void *mvneta_frag_alloc(unsigned int frag_size)
-{
-       if (likely(frag_size <= PAGE_SIZE))
-               return netdev_alloc_frag(frag_size);
-       else
-               return kmalloc(frag_size, GFP_ATOMIC);
-}
-EXPORT_SYMBOL_GPL(mvneta_frag_alloc);
-
-void mvneta_frag_free(unsigned int frag_size, void *data)
-{
-       if (likely(frag_size <= PAGE_SIZE))
-               skb_free_frag(data);
-       else
-               kfree(data);
-}
-EXPORT_SYMBOL_GPL(mvneta_frag_free);
-
 /* Refill processing for SW buffer management */
+/* Allocate page per descriptor */
 static int mvneta_rx_refill(struct mvneta_port *pp,
                            struct mvneta_rx_desc *rx_desc,
-                           struct mvneta_rx_queue *rxq)
-
+                           struct mvneta_rx_queue *rxq,
+                           gfp_t gfp_mask)
 {
        dma_addr_t phys_addr;
-       void *data;
+       struct page *page;
 
-       data = mvneta_frag_alloc(pp->frag_size);
-       if (!data)
+       page = __dev_alloc_page(gfp_mask);
+       if (!page)
                return -ENOMEM;
 
-       phys_addr = dma_map_single(pp->dev->dev.parent, data,
-                                  MVNETA_RX_BUF_SIZE(pp->pkt_size),
-                                  DMA_FROM_DEVICE);
+       /* map page for use */
+       phys_addr = dma_map_page(pp->dev->dev.parent, page, 0, PAGE_SIZE,
+                                DMA_FROM_DEVICE);
        if (unlikely(dma_mapping_error(pp->dev->dev.parent, phys_addr))) {
-               mvneta_frag_free(pp->frag_size, data);
+               __free_page(page);
                return -ENOMEM;
        }
 
        phys_addr += pp->rx_offset_correction;
-       mvneta_rx_desc_fill(rx_desc, phys_addr, data, rxq);
+       mvneta_rx_desc_fill(rx_desc, phys_addr, page, rxq);
        return 0;
 }
 
@@ -1893,115 +1887,192 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
        for (i = 0; i < rxq->size; i++) {
                struct mvneta_rx_desc *rx_desc = rxq->descs + i;
                void *data = rxq->buf_virt_addr[i];
+               if (!data || !(rx_desc->buf_phys_addr))
+                       continue;
 
                dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
                                 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
-               mvneta_frag_free(pp->frag_size, data);
+               __free_page(data);
        }
 }
 
+static inline
+int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
+{
+       struct mvneta_rx_desc *rx_desc;
+       int curr_desc = rxq->first_to_refill;
+       int i;
+
+       for (i = 0; (i < rxq->refill_num) && (i < 64); i++) {
+               rx_desc = rxq->descs + curr_desc;
+               if (!(rx_desc->buf_phys_addr)) {
+                       if (mvneta_rx_refill(pp, rx_desc, rxq, GFP_ATOMIC)) {
+                               pr_err("Can't refill queue %d. Done %d from %d\n",
+                                      rxq->id, i, rxq->refill_num);
+                               rxq->refill_err++;
+                               break;
+                       }
+               }
+               curr_desc = MVNETA_QUEUE_NEXT_DESC(rxq, curr_desc);
+       }
+       rxq->refill_num -= i;
+       rxq->first_to_refill = curr_desc;
+
+       return i;
+}
+
 /* Main rx processing when using software buffer management */
-static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
+static int mvneta_rx_swbm(struct napi_struct *napi,
+                         struct mvneta_port *pp, int budget,
                          struct mvneta_rx_queue *rxq)
 {
-       struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
        struct net_device *dev = pp->dev;
-       int rx_done;
+       int rx_todo, rx_proc;
+       int refill = 0;
        u32 rcvd_pkts = 0;
        u32 rcvd_bytes = 0;
 
        /* Get number of received packets */
-       rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
-
-       if (rx_todo > rx_done)
-               rx_todo = rx_done;
-
-       rx_done = 0;
+       rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
+       rx_proc = 0;
 
        /* Fairness NAPI loop */
-       while (rx_done < rx_todo) {
+       while ((rcvd_pkts < budget) && (rx_proc < rx_todo)) {
                struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
-               struct sk_buff *skb;
                unsigned char *data;
+               struct page *page;
                dma_addr_t phys_addr;
-               u32 rx_status, frag_size;
-               int rx_bytes, err, index;
+               u32 rx_status, index;
+               int rx_bytes, skb_size, copy_size;
+               int frag_num, frag_size, frag_offset;
 
-               rx_done++;
-               rx_status = rx_desc->status;
-               rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
                index = rx_desc - rxq->descs;
-               data = rxq->buf_virt_addr[index];
-               phys_addr = rx_desc->buf_phys_addr - pp->rx_offset_correction;
-
-               if (!mvneta_rxq_desc_is_first_last(rx_status) ||
-                   (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
-                       mvneta_rx_error(pp, rx_desc);
-err_drop_frame:
-                       dev->stats.rx_errors++;
-                       /* leave the descriptor untouched */
-                       continue;
-               }
-
-               if (rx_bytes <= rx_copybreak) {
-               /* better copy a small frame and not unmap the DMA region */
-                       skb = netdev_alloc_skb_ip_align(dev, rx_bytes);
-                       if (unlikely(!skb))
-                               goto err_drop_frame;
-
-                       dma_sync_single_range_for_cpu(dev->dev.parent,
-                                                     phys_addr,
-                                                     MVNETA_MH_SIZE + NET_SKB_PAD,
-                                                     rx_bytes,
-                                                     DMA_FROM_DEVICE);
-                       skb_put_data(skb, data + MVNETA_MH_SIZE + NET_SKB_PAD,
-                                    rx_bytes);
+               page = (struct page *)rxq->buf_virt_addr[index];
+               data = page_address(page);
+               /* Prefetch header */
+               prefetch(data);
 
-                       skb->protocol = eth_type_trans(skb, dev);
-                       mvneta_rx_csum(pp, rx_status, skb);
-                       napi_gro_receive(&port->napi, skb);
-
-                       rcvd_pkts++;
-                       rcvd_bytes += rx_bytes;
+               phys_addr = rx_desc->buf_phys_addr;
+               rx_status = rx_desc->status;
+               rx_proc++;
+               rxq->refill_num++;
+
+               if (rx_status & MVNETA_RXD_FIRST_DESC) {
+                       /* Check errors only for FIRST descriptor */
+                       if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
+                               mvneta_rx_error(pp, rx_desc);
+                               dev->stats.rx_errors++;
+                               /* leave the descriptor untouched */
+                               continue;
+                       }
+                       rx_bytes = rx_desc->data_size -
+                                  (ETH_FCS_LEN + MVNETA_MH_SIZE);
+
+                       /* Allocate small skb for each new packet */
+                       skb_size = max(rx_copybreak, rx_header_size);
+                       rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
+                       if (unlikely(!rxq->skb)) {
+                               netdev_err(dev,
+                                          "Can't allocate skb on queue %d\n",
+                                          rxq->id);
+                               dev->stats.rx_dropped++;
+                               rxq->skb_alloc_err++;
+                               continue;
+                       }
+                       copy_size = min(skb_size, rx_bytes);
+
+                       /* Copy data from buffer to SKB, skip Marvell header */
+                       memcpy(rxq->skb->data, data + MVNETA_MH_SIZE,
+                              copy_size);
+                       skb_put(rxq->skb, copy_size);
+                       rxq->left_size = rx_bytes - copy_size;
+
+                       mvneta_rx_csum(pp, rx_status, rxq->skb);
+                       if (rxq->left_size == 0) {
+                               int size = copy_size + MVNETA_MH_SIZE;
+
+                               dma_sync_single_range_for_cpu(dev->dev.parent,
+                                                             phys_addr, 0,
+                                                             size,
+                                                             DMA_FROM_DEVICE);
+
+                               /* leave the descriptor and buffer untouched */
+                       } else {
+                               /* refill descriptor with new buffer later */
+                               rx_desc->buf_phys_addr = 0;
+
+                               frag_num = 0;
+                               frag_offset = copy_size + MVNETA_MH_SIZE;
+                               frag_size = min(rxq->left_size,
+                                               (int)(PAGE_SIZE - frag_offset));
+                               skb_add_rx_frag(rxq->skb, frag_num, page,
+                                               frag_offset, frag_size,
+                                               PAGE_SIZE);
+                               dma_unmap_single(dev->dev.parent, phys_addr,
+                                                PAGE_SIZE, DMA_FROM_DEVICE);
+                               rxq->left_size -= frag_size;
+                       }
+               } else {
+                       /* Middle or Last descriptor */
+                       if (unlikely(!rxq->skb)) {
+                               pr_debug("no skb for rx_status 0x%x\n",
+                                        rx_status);
+                               continue;
+                       }
+                       if (!rxq->left_size) {
+                               /* last descriptor has only FCS */
+                               /* and can be discarded */
+                               dma_sync_single_range_for_cpu(dev->dev.parent,
+                                                             phys_addr, 0,
+                                                             ETH_FCS_LEN,
+                                                             DMA_FROM_DEVICE);
+                               /* leave the descriptor and buffer untouched */
+                       } else {
+                               /* refill descriptor with new buffer later */
+                               rx_desc->buf_phys_addr = 0;
+
+                               frag_num = skb_shinfo(rxq->skb)->nr_frags;
+                               frag_offset = 0;
+                               frag_size = min(rxq->left_size,
+                                               (int)(PAGE_SIZE - frag_offset));
+                               skb_add_rx_frag(rxq->skb, frag_num, page,
+                                               frag_offset, frag_size,
+                                               PAGE_SIZE);
+
+                               dma_unmap_single(dev->dev.parent, phys_addr,
+                                                PAGE_SIZE,
+                                                DMA_FROM_DEVICE);
+
+                               rxq->left_size -= frag_size;
+                       }
+               } /* Middle or Last descriptor */
 
-                       /* leave the descriptor and buffer untouched */
+               if (!(rx_status & MVNETA_RXD_LAST_DESC))
+                       /* no last descriptor this time */
                        continue;
-               }
 
-               /* Refill processing */
-               err = mvneta_rx_refill(pp, rx_desc, rxq);
-               if (err) {
-                       netdev_err(dev, "Linux processing - Can't refill\n");
-                       rxq->missed++;
-                       goto err_drop_frame;
+               if (rxq->left_size) {
+                       pr_err("get last desc, but left_size (%d) != 0\n",
+                              rxq->left_size);
+                       dev_kfree_skb_any(rxq->skb);
+                       rxq->left_size = 0;
+                       rxq->skb = NULL;
+                       continue;
                }
-
-               frag_size = pp->frag_size;
-
-               skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
-
-               /* After refill old buffer has to be unmapped regardless
-                * the skb is successfully built or not.
-                */
-               dma_unmap_single(dev->dev.parent, phys_addr,
-                                MVNETA_RX_BUF_SIZE(pp->pkt_size),
-                                DMA_FROM_DEVICE);
-
-               if (!skb)
-                       goto err_drop_frame;
-
                rcvd_pkts++;
-               rcvd_bytes += rx_bytes;
+               rcvd_bytes += rxq->skb->len;
 
                /* Linux processing */
-               skb_reserve(skb, MVNETA_MH_SIZE + NET_SKB_PAD);
-               skb_put(skb, rx_bytes);
-
-               skb->protocol = eth_type_trans(skb, dev);
+               rxq->skb->protocol = eth_type_trans(rxq->skb, dev);
 
-               mvneta_rx_csum(pp, rx_status, skb);
+               if (dev->features & NETIF_F_GRO)
+                       napi_gro_receive(napi, rxq->skb);
+               else
+                       netif_receive_skb(rxq->skb);
 
-               napi_gro_receive(&port->napi, skb);
+               /* clean uncomplete skb pointer in queue */
+               rxq->skb = NULL;
+               rxq->left_size = 0;
        }
 
        if (rcvd_pkts) {
@@ -2013,17 +2084,20 @@ err_drop_frame:
                u64_stats_update_end(&stats->syncp);
        }
 
+       /* return some buffers to hardware queue, one at a time is too slow */
+       refill = mvneta_rx_refill_queue(pp, rxq);
+
        /* Update rxq management counters */
-       mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
+       mvneta_rxq_desc_num_update(pp, rxq, rx_proc, refill);
 
-       return rx_done;
+       return rcvd_pkts;
 }
 
 /* Main rx processing when using hardware buffer management */
-static int mvneta_rx_hwbm(struct mvneta_port *pp, int rx_todo,
+static int mvneta_rx_hwbm(struct napi_struct *napi,
+                         struct mvneta_port *pp, int rx_todo,
                          struct mvneta_rx_queue *rxq)
 {
-       struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
        struct net_device *dev = pp->dev;
        int rx_done;
        u32 rcvd_pkts = 0;
@@ -2085,7 +2159,7 @@ err_drop_frame:
 
                        skb->protocol = eth_type_trans(skb, dev);
                        mvneta_rx_csum(pp, rx_status, skb);
-                       napi_gro_receive(&port->napi, skb);
+                       napi_gro_receive(napi, skb);
 
                        rcvd_pkts++;
                        rcvd_bytes += rx_bytes;
@@ -2102,7 +2176,7 @@ err_drop_frame:
                err = hwbm_pool_refill(&bm_pool->hwbm_pool, GFP_ATOMIC);
                if (err) {
                        netdev_err(dev, "Linux processing - Can't refill\n");
-                       rxq->missed++;
+                       rxq->refill_err++;
                        goto err_drop_frame_ret_pool;
                }
 
@@ -2129,7 +2203,7 @@ err_drop_frame:
 
                mvneta_rx_csum(pp, rx_status, skb);
 
-               napi_gro_receive(&port->napi, skb);
+               napi_gro_receive(napi, skb);
        }
 
        if (rcvd_pkts) {
@@ -2722,9 +2796,11 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
        if (rx_queue) {
                rx_queue = rx_queue - 1;
                if (pp->bm_priv)
-                       rx_done = mvneta_rx_hwbm(pp, budget, &pp->rxqs[rx_queue]);
+                       rx_done = mvneta_rx_hwbm(napi, pp, budget,
+                                                &pp->rxqs[rx_queue]);
                else
-                       rx_done = mvneta_rx_swbm(pp, budget, &pp->rxqs[rx_queue]);
+                       rx_done = mvneta_rx_swbm(napi, pp, budget,
+                                                &pp->rxqs[rx_queue]);
        }
 
        if (rx_done < budget) {
@@ -2761,9 +2837,11 @@ static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 
        for (i = 0; i < num; i++) {
                memset(rxq->descs + i, 0, sizeof(struct mvneta_rx_desc));
-               if (mvneta_rx_refill(pp, rxq->descs + i, rxq) != 0) {
-                       netdev_err(pp->dev, "%s:rxq %d, %d of %d buffs  filled\n",
-                               __func__, rxq->id, i, num);
+               if (mvneta_rx_refill(pp, rxq->descs + i, rxq,
+                                    GFP_KERNEL) != 0) {
+                       netdev_err(pp->dev,
+                                  "%s:rxq %d, %d of %d buffs  filled\n",
+                                  __func__, rxq->id, i, num);
                        break;
                }
        }
@@ -2821,21 +2899,23 @@ static void mvneta_rxq_hw_init(struct mvneta_port *pp,
        mvreg_write(pp, MVNETA_RXQ_BASE_ADDR_REG(rxq->id), rxq->descs_phys);
        mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
 
-       /* Set Offset */
-       mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD - pp->rx_offset_correction);
-
        /* Set coalescing pkts and time */
        mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
        mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal);
 
        if (!pp->bm_priv) {
-               /* Fill RXQ with buffers from RX pool */
-               mvneta_rxq_buf_size_set(pp, rxq,
-                                       MVNETA_RX_BUF_SIZE(pp->pkt_size));
+               /* Set Offset */
+               mvneta_rxq_offset_set(pp, rxq, 0);
+               mvneta_rxq_buf_size_set(pp, rxq, pp->frag_size);
                mvneta_rxq_bm_disable(pp, rxq);
                mvneta_rxq_fill(pp, rxq, rxq->size);
        } else {
+               /* Set Offset */
+               mvneta_rxq_offset_set(pp, rxq,
+                                     NET_SKB_PAD - pp->rx_offset_correction);
+
                mvneta_rxq_bm_enable(pp, rxq);
+               /* Fill RXQ with buffers from RX pool */
                mvneta_rxq_long_pool_set(pp, rxq);
                mvneta_rxq_short_pool_set(pp, rxq);
                mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size);
@@ -2864,6 +2944,9 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
 {
        mvneta_rxq_drop_pkts(pp, rxq);
 
+       if (rxq->skb)
+               dev_kfree_skb_any(rxq->skb);
+
        if (rxq->descs)
                dma_free_coherent(pp->dev->dev.parent,
                                  rxq->size * MVNETA_DESC_ALIGNED_SIZE,
@@ -2874,6 +2957,10 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
        rxq->last_desc         = 0;
        rxq->next_desc_to_proc = 0;
        rxq->descs_phys        = 0;
+       rxq->first_to_refill   = 0;
+       rxq->refill_num        = 0;
+       rxq->skb               = NULL;
+       rxq->left_size         = 0;
 }
 
 static int mvneta_txq_sw_init(struct mvneta_port *pp,
@@ -3177,8 +3264,6 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
                mvneta_bm_update_mtu(pp, mtu);
 
        pp->pkt_size = MVNETA_RX_PKT_SIZE(dev->mtu);
-       pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
-                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
        ret = mvneta_setup_rxqs(pp);
        if (ret) {
@@ -3194,7 +3279,6 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
 
        on_each_cpu(mvneta_percpu_enable, pp, true);
        mvneta_start_dev(pp);
-       mvneta_port_up(pp);
 
        netdev_update_features(dev);
 
@@ -3666,8 +3750,7 @@ static int mvneta_open(struct net_device *dev)
        int ret;
 
        pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
-       pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
-                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+       pp->frag_size = PAGE_SIZE;
 
        ret = mvneta_setup_rxqs(pp);
        if (ret)
@@ -3962,6 +4045,12 @@ static void mvneta_ethtool_update_stats(struct mvneta_port *pp)
                        case ETHTOOL_STAT_EEE_WAKEUP:
                                val = phylink_get_eee_err(pp->phylink);
                                break;
+                       case ETHTOOL_STAT_SKB_ALLOC_ERR:
+                               val = pp->rxqs[0].skb_alloc_err;
+                               break;
+                       case ETHTOOL_STAT_REFILL_ERR:
+                               val = pp->rxqs[0].refill_err;
+                               break;
                        }
                        break;
                }
@@ -4362,14 +4451,6 @@ static int mvneta_probe(struct platform_device *pdev)
        pp->dn = dn;
 
        pp->rxq_def = rxq_def;
-
-       /* Set RX packet offset correction for platforms, whose
-        * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
-        * platforms and 0B for 32-bit ones.
-        */
-       pp->rx_offset_correction =
-               max(0, NET_SKB_PAD - MVNETA_RX_PKT_OFFSET_CORRECTION);
-
        pp->indir[0] = rxq_def;
 
        /* Get special SoC configurations */
@@ -4457,16 +4538,28 @@ static int mvneta_probe(struct platform_device *pdev)
        SET_NETDEV_DEV(dev, &pdev->dev);
 
        pp->id = global_port_id++;
+       pp->rx_offset_correction = 0; /* not relevant for SW BM */
 
        /* Obtain access to BM resources if enabled and already initialized */
        bm_node = of_parse_phandle(dn, "buffer-manager", 0);
-       if (bm_node && bm_node->data) {
-               pp->bm_priv = bm_node->data;
-               err = mvneta_bm_port_init(pdev, pp);
-               if (err < 0) {
-                       dev_info(&pdev->dev, "use SW buffer management\n");
-                       pp->bm_priv = NULL;
+       if (bm_node) {
+               pp->bm_priv = mvneta_bm_get(bm_node);
+               if (pp->bm_priv) {
+                       err = mvneta_bm_port_init(pdev, pp);
+                       if (err < 0) {
+                               dev_info(&pdev->dev,
+                                        "use SW buffer management\n");
+                               mvneta_bm_put(pp->bm_priv);
+                               pp->bm_priv = NULL;
+                       }
                }
+               /* Set RX packet offset correction for platforms, whose
+                * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
+                * platforms and 0B for 32-bit ones.
+                */
+               pp->rx_offset_correction = max(0,
+                                              NET_SKB_PAD -
+                                              MVNETA_RX_PKT_OFFSET_CORRECTION);
        }
        of_node_put(bm_node);
 
@@ -4526,6 +4619,7 @@ err_netdev:
                mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
                mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short,
                                       1 << pp->id);
+               mvneta_bm_put(pp->bm_priv);
        }
 err_free_stats:
        free_percpu(pp->stats);
@@ -4563,6 +4657,7 @@ static int mvneta_remove(struct platform_device *pdev)
                mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
                mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short,
                                       1 << pp->id);
+               mvneta_bm_put(pp->bm_priv);
        }
 
        return 0;
index 466939f8f0cfce1e0f6b11aad59700ab2866e194..de468e1bdba9f3fb8bd66db9ee528b6859d7bf6c 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/skbuff.h>
 #include <net/hwbm.h>
@@ -392,6 +393,20 @@ static void mvneta_bm_put_sram(struct mvneta_bm *priv)
                      MVNETA_BM_BPPI_SIZE);
 }
 
+struct mvneta_bm *mvneta_bm_get(struct device_node *node)
+{
+       struct platform_device *pdev = of_find_device_by_node(node);
+
+       return pdev ? platform_get_drvdata(pdev) : NULL;
+}
+EXPORT_SYMBOL_GPL(mvneta_bm_get);
+
+void mvneta_bm_put(struct mvneta_bm *priv)
+{
+       platform_device_put(priv->pdev);
+}
+EXPORT_SYMBOL_GPL(mvneta_bm_put);
+
 static int mvneta_bm_probe(struct platform_device *pdev)
 {
        struct device_node *dn = pdev->dev.of_node;
index a32de432800c9b4d56b776328486d815680cadc4..c8425d35c049bc8cf232eee65731ea0b166227cc 100644 (file)
@@ -130,10 +130,10 @@ struct mvneta_bm_pool {
 };
 
 /* Declarations and definitions */
-void *mvneta_frag_alloc(unsigned int frag_size);
-void mvneta_frag_free(unsigned int frag_size, void *data);
-
 #if IS_ENABLED(CONFIG_MVNETA_BM)
+struct mvneta_bm *mvneta_bm_get(struct device_node *node);
+void mvneta_bm_put(struct mvneta_bm *priv);
+
 void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
                            struct mvneta_bm_pool *bm_pool, u8 port_map);
 void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
@@ -178,5 +178,7 @@ static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv,
 static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv,
                                        struct mvneta_bm_pool *bm_pool)
 { return 0; }
+struct mvneta_bm *mvneta_bm_get(struct device_node *node) { return NULL; }
+void mvneta_bm_put(struct mvneta_bm *priv) {}
 #endif /* CONFIG_MVNETA_BM */
 #endif
index 4d11dd9e3246871fa37f12805c12de7bbe4c7717..51f65a202c6ed651304895a912e54ba72bc16750 100644 (file)
@@ -4,4 +4,4 @@
 #
 obj-$(CONFIG_MVPP2) := mvpp2.o
 
-mvpp2-objs := mvpp2_main.o mvpp2_prs.o mvpp2_cls.o
+mvpp2-objs := mvpp2_main.o mvpp2_prs.o mvpp2_cls.o mvpp2_debugfs.o
index def00dc3eb4e993887d2d3fd8f42a24116ab867d..67b9e81b7c0246435c26680e06939ef2e061bfd7 100644 (file)
@@ -1,17 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Definitions for Marvell PPv2 network controller for Armada 375 SoC.
  *
  * Copyright (C) 2014 Marvell
  *
  * Marcin Wojtas <mw@semihalf.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 #ifndef _MVPP2_H_
 #define _MVPP2_H_
 
+#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/phy.h>
 #define MVPP2_PRS_SRAM_DATA_REG(idx)           (0x1204 + (idx) * 4)
 #define MVPP2_PRS_TCAM_CTRL_REG                        0x1230
 #define     MVPP2_PRS_TCAM_EN_MASK             BIT(0)
+#define MVPP2_PRS_TCAM_HIT_IDX_REG             0x1240
+#define MVPP2_PRS_TCAM_HIT_CNT_REG             0x1244
+#define     MVPP2_PRS_TCAM_HIT_CNT_MASK                GENMASK(15, 0)
 
 /* RSS Registers */
 #define MVPP22_RSS_INDEX                       0x1500
 #define     MVPP22_RSS_INDEX_TABLE_ENTRY(idx)  (idx)
 #define     MVPP22_RSS_INDEX_TABLE(idx)                ((idx) << 8)
 #define     MVPP22_RSS_INDEX_QUEUE(idx)                ((idx) << 16)
-#define MVPP22_RSS_TABLE_ENTRY                 0x1508
-#define MVPP22_RSS_TABLE                       0x1510
+#define MVPP22_RXQ2RSS_TABLE                   0x1504
 #define     MVPP22_RSS_TABLE_POINTER(p)                (p)
+#define MVPP22_RSS_TABLE_ENTRY                 0x1508
 #define MVPP22_RSS_WIDTH                       0x150c
 
 /* Classifier Registers */
 #define     MVPP2_CLS_LKP_INDEX_WAY_OFFS       6
 #define MVPP2_CLS_LKP_TBL_REG                  0x1818
 #define     MVPP2_CLS_LKP_TBL_RXQ_MASK         0xff
+#define     MVPP2_CLS_LKP_FLOW_PTR(flow)       ((flow) << 16)
 #define     MVPP2_CLS_LKP_TBL_LOOKUP_EN_MASK   BIT(25)
 #define MVPP2_CLS_FLOW_INDEX_REG               0x1820
 #define MVPP2_CLS_FLOW_TBL0_REG                        0x1824
+#define     MVPP2_CLS_FLOW_TBL0_LAST           BIT(0)
+#define     MVPP2_CLS_FLOW_TBL0_ENG_MASK       0x7
+#define     MVPP2_CLS_FLOW_TBL0_OFFS           1
+#define     MVPP2_CLS_FLOW_TBL0_ENG(x)         ((x) << 1)
+#define     MVPP2_CLS_FLOW_TBL0_PORT_ID_MASK   0xff
+#define     MVPP2_CLS_FLOW_TBL0_PORT_ID(port)  ((port) << 4)
+#define     MVPP2_CLS_FLOW_TBL0_PORT_ID_SEL    BIT(23)
 #define MVPP2_CLS_FLOW_TBL1_REG                        0x1828
+#define     MVPP2_CLS_FLOW_TBL1_N_FIELDS_MASK  0x7
+#define     MVPP2_CLS_FLOW_TBL1_N_FIELDS(x)    (x)
+#define     MVPP2_CLS_FLOW_TBL1_PRIO_MASK      0x3f
+#define     MVPP2_CLS_FLOW_TBL1_PRIO(x)                ((x) << 9)
+#define     MVPP2_CLS_FLOW_TBL1_SEQ_MASK       0x7
+#define     MVPP2_CLS_FLOW_TBL1_SEQ(x)         ((x) << 15)
 #define MVPP2_CLS_FLOW_TBL2_REG                        0x182c
+#define     MVPP2_CLS_FLOW_TBL2_FLD_MASK       0x3f
+#define     MVPP2_CLS_FLOW_TBL2_FLD_OFFS(n)    ((n) * 6)
+#define     MVPP2_CLS_FLOW_TBL2_FLD(n, x)      ((x) << ((n) * 6))
 #define MVPP2_CLS_OVERSIZE_RXQ_LOW_REG(port)   (0x1980 + ((port) * 4))
 #define     MVPP2_CLS_OVERSIZE_RXQ_LOW_BITS    3
 #define     MVPP2_CLS_OVERSIZE_RXQ_LOW_MASK    0x7
 #define MVPP2_CLS_SWFWD_PCTRL_REG              0x19d0
 #define     MVPP2_CLS_SWFWD_PCTRL_MASK(port)   (1 << (port))
 
+/* Classifier C2 engine Registers */
+#define MVPP22_CLS_C2_TCAM_IDX                 0x1b00
+#define MVPP22_CLS_C2_TCAM_DATA0               0x1b10
+#define MVPP22_CLS_C2_TCAM_DATA1               0x1b14
+#define MVPP22_CLS_C2_TCAM_DATA2               0x1b18
+#define MVPP22_CLS_C2_TCAM_DATA3               0x1b1c
+#define MVPP22_CLS_C2_TCAM_DATA4               0x1b20
+#define     MVPP22_CLS_C2_PORT_ID(port)                ((port) << 8)
+#define MVPP22_CLS_C2_HIT_CTR                  0x1b50
+#define MVPP22_CLS_C2_ACT                      0x1b60
+#define     MVPP22_CLS_C2_ACT_RSS_EN(act)      (((act) & 0x3) << 19)
+#define     MVPP22_CLS_C2_ACT_FWD(act)         (((act) & 0x7) << 13)
+#define     MVPP22_CLS_C2_ACT_QHIGH(act)       (((act) & 0x3) << 11)
+#define     MVPP22_CLS_C2_ACT_QLOW(act)                (((act) & 0x3) << 9)
+#define MVPP22_CLS_C2_ATTR0                    0x1b64
+#define     MVPP22_CLS_C2_ATTR0_QHIGH(qh)      (((qh) & 0x1f) << 24)
+#define     MVPP22_CLS_C2_ATTR0_QHIGH_MASK     0x1f
+#define     MVPP22_CLS_C2_ATTR0_QHIGH_OFFS     24
+#define     MVPP22_CLS_C2_ATTR0_QLOW(ql)       (((ql) & 0x7) << 21)
+#define     MVPP22_CLS_C2_ATTR0_QLOW_MASK      0x7
+#define     MVPP22_CLS_C2_ATTR0_QLOW_OFFS      21
+#define MVPP22_CLS_C2_ATTR1                    0x1b68
+#define MVPP22_CLS_C2_ATTR2                    0x1b6c
+#define     MVPP22_CLS_C2_ATTR2_RSS_EN         BIT(30)
+#define MVPP22_CLS_C2_ATTR3                    0x1b70
+
 /* Descriptor Manager Top Registers */
 #define MVPP2_RXQ_NUM_REG                      0x2040
 #define MVPP2_RXQ_DESC_ADDR_REG                        0x2044
 #define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK  0xff00
 #define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT 8
 
+/* Hit counters registers */
+#define MVPP2_CTRS_IDX                         0x7040
+#define MVPP2_CLS_DEC_TBL_HIT_CTR              0x7700
+#define MVPP2_CLS_FLOW_TBL_HIT_CTR             0x7704
+
 /* TX Scheduler registers */
 #define MVPP2_TXP_SCHED_PORT_INDEX_REG         0x8000
 #define MVPP2_TXP_SCHED_Q_CMD_REG              0x8004
 #define MVPP2_MAX_SKB_DESCS            (MVPP2_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
 
 /* Dfault number of RXQs in use */
-#define MVPP2_DEFAULT_RXQ              4
+#define MVPP2_DEFAULT_RXQ              1
 
 /* Max number of Rx descriptors */
 #define MVPP2_MAX_RXD_MAX              1024
        ((total_size) - NET_SKB_PAD - MVPP2_SKB_SHINFO_SIZE)
 
 #define MVPP2_BIT_TO_BYTE(bit)         ((bit) / 8)
+#define MVPP2_BIT_TO_WORD(bit)         ((bit) / 32)
+#define MVPP2_BIT_IN_WORD(bit)         ((bit) % 32)
+
+/* RSS constants */
+#define MVPP22_RSS_TABLE_ENTRIES       32
 
 /* IPv6 max L3 address size */
 #define MVPP2_MAX_L3_ADDR_SIZE         16
@@ -703,6 +757,9 @@ struct mvpp2 {
        /* Workqueue to gather hardware statistics */
        char queue_name[30];
        struct workqueue_struct *stats_queue;
+
+       /* Debugfs root entry */
+       struct dentry *dbgfs_dir;
 };
 
 struct mvpp2_pcpu_stats {
@@ -795,6 +852,9 @@ struct mvpp2_port {
        bool has_tx_irqs;
 
        u32 tx_time_coal;
+
+       /* RSS indirection table */
+       u32 indir[MVPP22_RSS_TABLE_ENTRIES];
 };
 
 /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
@@ -831,52 +891,52 @@ struct mvpp2_port {
 
 /* HW TX descriptor for PPv2.1 */
 struct mvpp21_tx_desc {
-       u32 command;            /* Options used by HW for packet transmitting.*/
+       __le32 command;         /* Options used by HW for packet transmitting.*/
        u8  packet_offset;      /* the offset from the buffer beginning */
        u8  phys_txq;           /* destination queue ID                 */
-       u16 data_size;          /* data size of transmitted packet in bytes */
-       u32 buf_dma_addr;       /* physical addr of transmitted buffer  */
-       u32 buf_cookie;         /* cookie for access to TX buffer in tx path */
-       u32 reserved1[3];       /* hw_cmd (for future use, BM, PON, PNC) */
-       u32 reserved2;          /* reserved (for future use)            */
+       __le16 data_size;       /* data size of transmitted packet in bytes */
+       __le32 buf_dma_addr;    /* physical addr of transmitted buffer  */
+       __le32 buf_cookie;      /* cookie for access to TX buffer in tx path */
+       __le32 reserved1[3];    /* hw_cmd (for future use, BM, PON, PNC) */
+       __le32 reserved2;       /* reserved (for future use)            */
 };
 
 /* HW RX descriptor for PPv2.1 */
 struct mvpp21_rx_desc {
-       u32 status;             /* info about received packet           */
-       u16 reserved1;          /* parser_info (for future use, PnC)    */
-       u16 data_size;          /* size of received packet in bytes     */
-       u32 buf_dma_addr;       /* physical address of the buffer       */
-       u32 buf_cookie;         /* cookie for access to RX buffer in rx path */
-       u16 reserved2;          /* gem_port_id (for future use, PON)    */
-       u16 reserved3;          /* csum_l4 (for future use, PnC)        */
+       __le32 status;          /* info about received packet           */
+       __le16 reserved1;       /* parser_info (for future use, PnC)    */
+       __le16 data_size;       /* size of received packet in bytes     */
+       __le32 buf_dma_addr;    /* physical address of the buffer       */
+       __le32 buf_cookie;      /* cookie for access to RX buffer in rx path */
+       __le16 reserved2;       /* gem_port_id (for future use, PON)    */
+       __le16 reserved3;       /* csum_l4 (for future use, PnC)        */
        u8  reserved4;          /* bm_qset (for future use, BM)         */
        u8  reserved5;
-       u16 reserved6;          /* classify_info (for future use, PnC)  */
-       u32 reserved7;          /* flow_id (for future use, PnC) */
-       u32 reserved8;
+       __le16 reserved6;       /* classify_info (for future use, PnC)  */
+       __le32 reserved7;       /* flow_id (for future use, PnC) */
+       __le32 reserved8;
 };
 
 /* HW TX descriptor for PPv2.2 */
 struct mvpp22_tx_desc {
-       u32 command;
+       __le32 command;
        u8  packet_offset;
        u8  phys_txq;
-       u16 data_size;
-       u64 reserved1;
-       u64 buf_dma_addr_ptp;
-       u64 buf_cookie_misc;
+       __le16 data_size;
+       __le64 reserved1;
+       __le64 buf_dma_addr_ptp;
+       __le64 buf_cookie_misc;
 };
 
 /* HW RX descriptor for PPv2.2 */
 struct mvpp22_rx_desc {
-       u32 status;
-       u16 reserved1;
-       u16 data_size;
-       u32 reserved2;
-       u32 reserved3;
-       u64 buf_dma_addr_key_hash;
-       u64 buf_cookie_misc;
+       __le32 status;
+       __le16 reserved1;
+       __le16 data_size;
+       __le32 reserved2;
+       __le32 reserved3;
+       __le64 buf_dma_addr_key_hash;
+       __le64 buf_cookie_misc;
 };
 
 /* Opaque type used by the driver to manipulate the HW TX and RX
@@ -1043,4 +1103,8 @@ u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu, u32 offset);
 void mvpp2_percpu_write_relaxed(struct mvpp2 *priv, int cpu, u32 offset,
                                u32 data);
 
+void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name);
+
+void mvpp2_dbgfs_cleanup(struct mvpp2 *priv);
+
 #endif
index 8581d5b17dd51f4a06d4c4d622df8ea3e51cee25..efdb7a65683576a84806639630fce4d0928defcd 100644 (file)
+// SPDX-License-Identifier: GPL-2.0
 /*
  * RSS and Classifier helpers for Marvell PPv2 Network Controller
  *
  * Copyright (C) 2014 Marvell
  *
  * Marcin Wojtas <mw@semihalf.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 
 #include "mvpp2.h"
 #include "mvpp2_cls.h"
+#include "mvpp2_prs.h"
+
+#define MVPP2_DEF_FLOW(_type, _id, _opts, _ri, _ri_mask)       \
+{                                                              \
+       .flow_type = _type,                                     \
+       .flow_id = _id,                                         \
+       .supported_hash_opts = _opts,                           \
+       .prs_ri = {                                             \
+               .ri = _ri,                                      \
+               .ri_mask = _ri_mask                             \
+       }                                                       \
+}
+
+static struct mvpp2_cls_flow cls_flows[MVPP2_N_FLOWS] = {
+       /* TCP over IPv4 flows, Not fragmented, no vlan tag */
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
+                      MVPP22_CLS_HEK_IP4_5T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+                      MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
+                      MVPP22_CLS_HEK_IP4_5T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+                      MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG,
+                      MVPP22_CLS_HEK_IP4_5T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+                      MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       /* TCP over IPv4 flows, Not fragmented, with vlan tag */
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_TAG,
+                      MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_TAG,
+                      MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_TAG,
+                      MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK),
+
+       /* TCP over IPv4 flows, fragmented, no vlan tag */
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+                      MVPP22_CLS_HEK_IP4_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+                      MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+                      MVPP22_CLS_HEK_IP4_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+                      MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+                      MVPP22_CLS_HEK_IP4_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+                      MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       /* TCP over IPv4 flows, fragmented, with vlan tag */
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_TAG,
+                      MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_TAG,
+                      MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_FRAG_TAG,
+                      MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK),
+
+       /* UDP over IPv4 flows, Not fragmented, no vlan tag */
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_UNTAG,
+                      MVPP22_CLS_HEK_IP4_5T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+                      MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_UNTAG,
+                      MVPP22_CLS_HEK_IP4_5T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+                      MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_UNTAG,
+                      MVPP22_CLS_HEK_IP4_5T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+                      MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       /* UDP over IPv4 flows, Not fragmented, with vlan tag */
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_TAG,
+                      MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_TAG,
+                      MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_NF_TAG,
+                      MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK),
+
+       /* UDP over IPv4 flows, fragmented, no vlan tag */
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+                      MVPP22_CLS_HEK_IP4_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4 |
+                      MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+                      MVPP22_CLS_HEK_IP4_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT |
+                      MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+                      MVPP22_CLS_HEK_IP4_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER |
+                      MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       /* UDP over IPv4 flows, fragmented, with vlan tag */
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_TAG,
+                      MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_TAG,
+                      MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V4_FLOW, MVPP2_FL_IP4_UDP_FRAG_TAG,
+                      MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK),
+
+       /* TCP over IPv6 flows, not fragmented, no vlan tag */
+       MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_UNTAG,
+                      MVPP22_CLS_HEK_IP6_5T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
+                      MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_UNTAG,
+                      MVPP22_CLS_HEK_IP6_5T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
+                      MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       /* TCP over IPv6 flows, not fragmented, with vlan tag */
+       MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_TAG,
+                      MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_NF_TAG,
+                      MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK),
+
+       /* TCP over IPv6 flows, fragmented, no vlan tag */
+       MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_UNTAG,
+                      MVPP22_CLS_HEK_IP6_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
+                      MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_UNTAG,
+                      MVPP22_CLS_HEK_IP6_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
+                      MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       /* TCP over IPv6 flows, fragmented, with vlan tag */
+       MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_TAG,
+                      MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_IP_FRAG_TRUE |
+                      MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(TCP_V6_FLOW, MVPP2_FL_IP6_TCP_FRAG_TAG,
+                      MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_IP_FRAG_TRUE |
+                      MVPP2_PRS_RI_L4_TCP,
+                      MVPP2_PRS_IP_MASK),
+
+       /* UDP over IPv6 flows, not fragmented, no vlan tag */
+       MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_UNTAG,
+                      MVPP22_CLS_HEK_IP6_5T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
+                      MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_UNTAG,
+                      MVPP22_CLS_HEK_IP6_5T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
+                      MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       /* UDP over IPv6 flows, not fragmented, with vlan tag */
+       MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_TAG,
+                      MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_NF_TAG,
+                      MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK),
+
+       /* UDP over IPv6 flows, fragmented, no vlan tag */
+       MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_UNTAG,
+                      MVPP22_CLS_HEK_IP6_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6 |
+                      MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_UNTAG,
+                      MVPP22_CLS_HEK_IP6_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6_EXT |
+                      MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK | MVPP2_PRS_RI_VLAN_MASK),
+
+       /* UDP over IPv6 flows, fragmented, with vlan tag */
+       MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_TAG,
+                      MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_IP_FRAG_TRUE |
+                      MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK),
+
+       MVPP2_DEF_FLOW(UDP_V6_FLOW, MVPP2_FL_IP6_UDP_FRAG_TAG,
+                      MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_IP_FRAG_TRUE |
+                      MVPP2_PRS_RI_L4_UDP,
+                      MVPP2_PRS_IP_MASK),
+
+       /* IPv4 flows, no vlan tag */
+       MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_UNTAG,
+                      MVPP22_CLS_HEK_IP4_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4,
+                      MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
+       MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_UNTAG,
+                      MVPP22_CLS_HEK_IP4_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OPT,
+                      MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
+       MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_UNTAG,
+                      MVPP22_CLS_HEK_IP4_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP4_OTHER,
+                      MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
+
+       /* IPv4 flows, with vlan tag */
+       MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_TAG,
+                      MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4,
+                      MVPP2_PRS_RI_L3_PROTO_MASK),
+       MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_TAG,
+                      MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4_OPT,
+                      MVPP2_PRS_RI_L3_PROTO_MASK),
+       MVPP2_DEF_FLOW(IPV4_FLOW, MVPP2_FL_IP4_TAG,
+                      MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP4_OTHER,
+                      MVPP2_PRS_RI_L3_PROTO_MASK),
+
+       /* IPv6 flows, no vlan tag */
+       MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_UNTAG,
+                      MVPP22_CLS_HEK_IP6_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6,
+                      MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
+       MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_UNTAG,
+                      MVPP22_CLS_HEK_IP6_2T,
+                      MVPP2_PRS_RI_VLAN_NONE | MVPP2_PRS_RI_L3_IP6,
+                      MVPP2_PRS_RI_VLAN_MASK | MVPP2_PRS_RI_L3_PROTO_MASK),
+
+       /* IPv6 flows, with vlan tag */
+       MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_TAG,
+                      MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP6,
+                      MVPP2_PRS_RI_L3_PROTO_MASK),
+       MVPP2_DEF_FLOW(IPV6_FLOW, MVPP2_FL_IP6_TAG,
+                      MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN,
+                      MVPP2_PRS_RI_L3_IP6,
+                      MVPP2_PRS_RI_L3_PROTO_MASK),
+
+       /* Non IP flow, no vlan tag */
+       MVPP2_DEF_FLOW(ETHER_FLOW, MVPP2_FL_NON_IP_UNTAG,
+                      0,
+                      MVPP2_PRS_RI_VLAN_NONE,
+                      MVPP2_PRS_RI_VLAN_MASK),
+       /* Non IP flow, with vlan tag */
+       MVPP2_DEF_FLOW(ETHER_FLOW, MVPP2_FL_NON_IP_TAG,
+                      MVPP22_CLS_HEK_OPT_VLAN,
+                      0, 0),
+};
+
+u32 mvpp2_cls_flow_hits(struct mvpp2 *priv, int index)
+{
+       mvpp2_write(priv, MVPP2_CTRS_IDX, index);
+
+       return mvpp2_read(priv, MVPP2_CLS_FLOW_TBL_HIT_CTR);
+}
+
+void mvpp2_cls_flow_read(struct mvpp2 *priv, int index,
+                        struct mvpp2_cls_flow_entry *fe)
+{
+       fe->index = index;
+       mvpp2_write(priv, MVPP2_CLS_FLOW_INDEX_REG, index);
+       fe->data[0] = mvpp2_read(priv, MVPP2_CLS_FLOW_TBL0_REG);
+       fe->data[1] = mvpp2_read(priv, MVPP2_CLS_FLOW_TBL1_REG);
+       fe->data[2] = mvpp2_read(priv, MVPP2_CLS_FLOW_TBL2_REG);
+}
 
 /* Update classification flow table registers */
 static void mvpp2_cls_flow_write(struct mvpp2 *priv,
@@ -23,6 +349,25 @@ static void mvpp2_cls_flow_write(struct mvpp2 *priv,
        mvpp2_write(priv, MVPP2_CLS_FLOW_TBL2_REG,  fe->data[2]);
 }
 
+u32 mvpp2_cls_lookup_hits(struct mvpp2 *priv, int index)
+{
+       mvpp2_write(priv, MVPP2_CTRS_IDX, index);
+
+       return mvpp2_read(priv, MVPP2_CLS_DEC_TBL_HIT_CTR);
+}
+
+void mvpp2_cls_lookup_read(struct mvpp2 *priv, int lkpid, int way,
+                          struct mvpp2_cls_lookup_entry *le)
+{
+       u32 val;
+
+       val = (way << MVPP2_CLS_LKP_INDEX_WAY_OFFS) | lkpid;
+       mvpp2_write(priv, MVPP2_CLS_LKP_INDEX_REG, val);
+       le->way = way;
+       le->lkpid = lkpid;
+       le->data = mvpp2_read(priv, MVPP2_CLS_LKP_TBL_REG);
+}
+
 /* Update classification lookup table register */
 static void mvpp2_cls_lookup_write(struct mvpp2 *priv,
                                   struct mvpp2_cls_lookup_entry *le)
@@ -34,6 +379,439 @@ static void mvpp2_cls_lookup_write(struct mvpp2 *priv,
        mvpp2_write(priv, MVPP2_CLS_LKP_TBL_REG, le->data);
 }
 
+/* Operations on flow entry */
+static int mvpp2_cls_flow_hek_num_get(struct mvpp2_cls_flow_entry *fe)
+{
+       return fe->data[1] & MVPP2_CLS_FLOW_TBL1_N_FIELDS_MASK;
+}
+
+static void mvpp2_cls_flow_hek_num_set(struct mvpp2_cls_flow_entry *fe,
+                                      int num_of_fields)
+{
+       fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_N_FIELDS_MASK;
+       fe->data[1] |= MVPP2_CLS_FLOW_TBL1_N_FIELDS(num_of_fields);
+}
+
+static int mvpp2_cls_flow_hek_get(struct mvpp2_cls_flow_entry *fe,
+                                 int field_index)
+{
+       return (fe->data[2] >> MVPP2_CLS_FLOW_TBL2_FLD_OFFS(field_index)) &
+               MVPP2_CLS_FLOW_TBL2_FLD_MASK;
+}
+
+static void mvpp2_cls_flow_hek_set(struct mvpp2_cls_flow_entry *fe,
+                                  int field_index, int field_id)
+{
+       fe->data[2] &= ~MVPP2_CLS_FLOW_TBL2_FLD(field_index,
+                                               MVPP2_CLS_FLOW_TBL2_FLD_MASK);
+       fe->data[2] |= MVPP2_CLS_FLOW_TBL2_FLD(field_index, field_id);
+}
+
+static void mvpp2_cls_flow_eng_set(struct mvpp2_cls_flow_entry *fe,
+                                  int engine)
+{
+       fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_ENG(MVPP2_CLS_FLOW_TBL0_ENG_MASK);
+       fe->data[0] |= MVPP2_CLS_FLOW_TBL0_ENG(engine);
+}
+
+int mvpp2_cls_flow_eng_get(struct mvpp2_cls_flow_entry *fe)
+{
+       return (fe->data[0] >> MVPP2_CLS_FLOW_TBL0_OFFS) &
+               MVPP2_CLS_FLOW_TBL0_ENG_MASK;
+}
+
+static void mvpp2_cls_flow_port_id_sel(struct mvpp2_cls_flow_entry *fe,
+                                      bool from_packet)
+{
+       if (from_packet)
+               fe->data[0] |= MVPP2_CLS_FLOW_TBL0_PORT_ID_SEL;
+       else
+               fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_PORT_ID_SEL;
+}
+
+static void mvpp2_cls_flow_seq_set(struct mvpp2_cls_flow_entry *fe, u32 seq)
+{
+       fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_SEQ(MVPP2_CLS_FLOW_TBL1_SEQ_MASK);
+       fe->data[1] |= MVPP2_CLS_FLOW_TBL1_SEQ(seq);
+}
+
+static void mvpp2_cls_flow_last_set(struct mvpp2_cls_flow_entry *fe,
+                                   bool is_last)
+{
+       fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_LAST;
+       fe->data[0] |= !!is_last;
+}
+
+static void mvpp2_cls_flow_pri_set(struct mvpp2_cls_flow_entry *fe, int prio)
+{
+       fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_PRIO(MVPP2_CLS_FLOW_TBL1_PRIO_MASK);
+       fe->data[1] |= MVPP2_CLS_FLOW_TBL1_PRIO(prio);
+}
+
+static void mvpp2_cls_flow_port_add(struct mvpp2_cls_flow_entry *fe,
+                                   u32 port)
+{
+       fe->data[0] |= MVPP2_CLS_FLOW_TBL0_PORT_ID(port);
+}
+
+/* Initialize the parser entry for the given flow */
+static void mvpp2_cls_flow_prs_init(struct mvpp2 *priv,
+                                   struct mvpp2_cls_flow *flow)
+{
+       mvpp2_prs_add_flow(priv, flow->flow_id, flow->prs_ri.ri,
+                          flow->prs_ri.ri_mask);
+}
+
+/* Initialize the Lookup Id table entry for the given flow */
+static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv,
+                                   struct mvpp2_cls_flow *flow)
+{
+       struct mvpp2_cls_lookup_entry le;
+
+       le.way = 0;
+       le.lkpid = flow->flow_id;
+
+       /* The default RxQ for this port is set in the C2 lookup */
+       le.data = 0;
+
+       /* We point on the first lookup in the sequence for the flow, that is
+        * the C2 lookup.
+        */
+       le.data |= MVPP2_CLS_LKP_FLOW_PTR(MVPP2_FLOW_C2_ENTRY(flow->flow_id));
+
+       /* CLS is always enabled, RSS is enabled/disabled in C2 lookup */
+       le.data |= MVPP2_CLS_LKP_TBL_LOOKUP_EN_MASK;
+
+       mvpp2_cls_lookup_write(priv, &le);
+}
+
+/* Initialize the flow table entries for the given flow */
+static void mvpp2_cls_flow_init(struct mvpp2 *priv, struct mvpp2_cls_flow *flow)
+{
+       struct mvpp2_cls_flow_entry fe;
+       int i;
+
+       /* C2 lookup */
+       memset(&fe, 0, sizeof(fe));
+       fe.index = MVPP2_FLOW_C2_ENTRY(flow->flow_id);
+
+       mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C2);
+       mvpp2_cls_flow_port_id_sel(&fe, true);
+       mvpp2_cls_flow_last_set(&fe, 0);
+       mvpp2_cls_flow_pri_set(&fe, 0);
+       mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_FIRST1);
+
+       /* Add all ports */
+       for (i = 0; i < MVPP2_MAX_PORTS; i++)
+               mvpp2_cls_flow_port_add(&fe, BIT(i));
+
+       mvpp2_cls_flow_write(priv, &fe);
+
+       /* C3Hx lookups */
+       for (i = 0; i < MVPP2_MAX_PORTS; i++) {
+               memset(&fe, 0, sizeof(fe));
+               fe.index = MVPP2_PORT_FLOW_HASH_ENTRY(i, flow->flow_id);
+
+               mvpp2_cls_flow_port_id_sel(&fe, true);
+               mvpp2_cls_flow_pri_set(&fe, i + 1);
+               mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_MIDDLE);
+               mvpp2_cls_flow_port_add(&fe, BIT(i));
+
+               mvpp2_cls_flow_write(priv, &fe);
+       }
+
+       /* Update the last entry */
+       mvpp2_cls_flow_last_set(&fe, 1);
+       mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_LAST);
+
+       mvpp2_cls_flow_write(priv, &fe);
+}
+
+/* Adds a field to the Header Extracted Key generation parameters*/
+static int mvpp2_flow_add_hek_field(struct mvpp2_cls_flow_entry *fe,
+                                   u32 field_id)
+{
+       int nb_fields = mvpp2_cls_flow_hek_num_get(fe);
+
+       if (nb_fields == MVPP2_FLOW_N_FIELDS)
+               return -EINVAL;
+
+       mvpp2_cls_flow_hek_set(fe, nb_fields, field_id);
+
+       mvpp2_cls_flow_hek_num_set(fe, nb_fields + 1);
+
+       return 0;
+}
+
+static int mvpp2_flow_set_hek_fields(struct mvpp2_cls_flow_entry *fe,
+                                    unsigned long hash_opts)
+{
+       u32 field_id;
+       int i;
+
+       /* Clear old fields */
+       mvpp2_cls_flow_hek_num_set(fe, 0);
+       fe->data[2] = 0;
+
+       for_each_set_bit(i, &hash_opts, MVPP22_CLS_HEK_N_FIELDS) {
+               switch (BIT(i)) {
+               case MVPP22_CLS_HEK_OPT_VLAN:
+                       field_id = MVPP22_CLS_FIELD_VLAN;
+                       break;
+               case MVPP22_CLS_HEK_OPT_IP4SA:
+                       field_id = MVPP22_CLS_FIELD_IP4SA;
+                       break;
+               case MVPP22_CLS_HEK_OPT_IP4DA:
+                       field_id = MVPP22_CLS_FIELD_IP4DA;
+                       break;
+               case MVPP22_CLS_HEK_OPT_IP6SA:
+                       field_id = MVPP22_CLS_FIELD_IP6SA;
+                       break;
+               case MVPP22_CLS_HEK_OPT_IP6DA:
+                       field_id = MVPP22_CLS_FIELD_IP6DA;
+                       break;
+               case MVPP22_CLS_HEK_OPT_L4SIP:
+                       field_id = MVPP22_CLS_FIELD_L4SIP;
+                       break;
+               case MVPP22_CLS_HEK_OPT_L4DIP:
+                       field_id = MVPP22_CLS_FIELD_L4DIP;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               if (mvpp2_flow_add_hek_field(fe, field_id))
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
+struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow)
+{
+       if (flow >= MVPP2_N_FLOWS)
+               return NULL;
+
+       return &cls_flows[flow];
+}
+
+/* Set the hash generation options for the given traffic flow.
+ * One traffic flow (in the ethtool sense) has multiple classification flows,
+ * to handle specific cases such as fragmentation, or the presence of a
+ * VLAN / DSA Tag.
+ *
+ * Each of these individual flows has different constraints, for example we
+ * can't hash fragmented packets on L4 data (else we would risk having packet
+ * re-ordering), so each classification flows masks the options with their
+ * supported ones.
+ *
+ */
+static int mvpp2_port_rss_hash_opts_set(struct mvpp2_port *port, int flow_type,
+                                       u16 requested_opts)
+{
+       struct mvpp2_cls_flow_entry fe;
+       struct mvpp2_cls_flow *flow;
+       int i, engine, flow_index;
+       u16 hash_opts;
+
+       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+               flow = mvpp2_cls_flow_get(i);
+               if (!flow)
+                       return -EINVAL;
+
+               if (flow->flow_type != flow_type)
+                       continue;
+
+               flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id,
+                                                       flow->flow_id);
+
+               mvpp2_cls_flow_read(port->priv, flow_index, &fe);
+
+               hash_opts = flow->supported_hash_opts & requested_opts;
+
+               /* Use C3HB engine to access L4 infos. This adds L4 infos to the
+                * hash parameters
+                */
+               if (hash_opts & MVPP22_CLS_HEK_L4_OPTS)
+                       engine = MVPP22_CLS_ENGINE_C3HB;
+               else
+                       engine = MVPP22_CLS_ENGINE_C3HA;
+
+               if (mvpp2_flow_set_hek_fields(&fe, hash_opts))
+                       return -EINVAL;
+
+               mvpp2_cls_flow_eng_set(&fe, engine);
+
+               mvpp2_cls_flow_write(port->priv, &fe);
+       }
+
+       return 0;
+}
+
+u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe)
+{
+       u16 hash_opts = 0;
+       int n_fields, i, field;
+
+       n_fields = mvpp2_cls_flow_hek_num_get(fe);
+
+       for (i = 0; i < n_fields; i++) {
+               field = mvpp2_cls_flow_hek_get(fe, i);
+
+               switch (field) {
+               case MVPP22_CLS_FIELD_MAC_DA:
+                       hash_opts |= MVPP22_CLS_HEK_OPT_MAC_DA;
+                       break;
+               case MVPP22_CLS_FIELD_VLAN:
+                       hash_opts |= MVPP22_CLS_HEK_OPT_VLAN;
+                       break;
+               case MVPP22_CLS_FIELD_L3_PROTO:
+                       hash_opts |= MVPP22_CLS_HEK_OPT_L3_PROTO;
+                       break;
+               case MVPP22_CLS_FIELD_IP4SA:
+                       hash_opts |= MVPP22_CLS_HEK_OPT_IP4SA;
+                       break;
+               case MVPP22_CLS_FIELD_IP4DA:
+                       hash_opts |= MVPP22_CLS_HEK_OPT_IP4DA;
+                       break;
+               case MVPP22_CLS_FIELD_IP6SA:
+                       hash_opts |= MVPP22_CLS_HEK_OPT_IP6SA;
+                       break;
+               case MVPP22_CLS_FIELD_IP6DA:
+                       hash_opts |= MVPP22_CLS_HEK_OPT_IP6DA;
+                       break;
+               case MVPP22_CLS_FIELD_L4SIP:
+                       hash_opts |= MVPP22_CLS_HEK_OPT_L4SIP;
+                       break;
+               case MVPP22_CLS_FIELD_L4DIP:
+                       hash_opts |= MVPP22_CLS_HEK_OPT_L4DIP;
+                       break;
+               default:
+                       break;
+               }
+       }
+       return hash_opts;
+}
+
+/* Returns the hash opts for this flow. There are several classifier flows
+ * for one traffic flow, this returns an aggregation of all configurations.
+ */
+static u16 mvpp2_port_rss_hash_opts_get(struct mvpp2_port *port, int flow_type)
+{
+       struct mvpp2_cls_flow_entry fe;
+       struct mvpp2_cls_flow *flow;
+       int i, flow_index;
+       u16 hash_opts = 0;
+
+       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+               flow = mvpp2_cls_flow_get(i);
+               if (!flow)
+                       return 0;
+
+               if (flow->flow_type != flow_type)
+                       continue;
+
+               flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id,
+                                                       flow->flow_id);
+
+               mvpp2_cls_flow_read(port->priv, flow_index, &fe);
+
+               hash_opts |= mvpp2_flow_get_hek_fields(&fe);
+       }
+
+       return hash_opts;
+}
+
+static void mvpp2_cls_port_init_flows(struct mvpp2 *priv)
+{
+       struct mvpp2_cls_flow *flow;
+       int i;
+
+       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+               flow = mvpp2_cls_flow_get(i);
+               if (!flow)
+                       break;
+
+               mvpp2_cls_flow_prs_init(priv, flow);
+               mvpp2_cls_flow_lkp_init(priv, flow);
+               mvpp2_cls_flow_init(priv, flow);
+       }
+}
+
+static void mvpp2_cls_c2_write(struct mvpp2 *priv,
+                              struct mvpp2_cls_c2_entry *c2)
+{
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2->index);
+
+       /* Write TCAM */
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA0, c2->tcam[0]);
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA1, c2->tcam[1]);
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA2, c2->tcam[2]);
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA3, c2->tcam[3]);
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA4, c2->tcam[4]);
+
+       mvpp2_write(priv, MVPP22_CLS_C2_ACT, c2->act);
+
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR0, c2->attr[0]);
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR1, c2->attr[1]);
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR2, c2->attr[2]);
+       mvpp2_write(priv, MVPP22_CLS_C2_ATTR3, c2->attr[3]);
+}
+
+void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
+                      struct mvpp2_cls_c2_entry *c2)
+{
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, index);
+
+       c2->index = index;
+
+       c2->tcam[0] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA0);
+       c2->tcam[1] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA1);
+       c2->tcam[2] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA2);
+       c2->tcam[3] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA3);
+       c2->tcam[4] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA4);
+
+       c2->act = mvpp2_read(priv, MVPP22_CLS_C2_ACT);
+
+       c2->attr[0] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR0);
+       c2->attr[1] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR1);
+       c2->attr[2] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR2);
+       c2->attr[3] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR3);
+}
+
+static void mvpp2_port_c2_cls_init(struct mvpp2_port *port)
+{
+       struct mvpp2_cls_c2_entry c2;
+       u8 qh, ql, pmap;
+
+       memset(&c2, 0, sizeof(c2));
+
+       c2.index = MVPP22_CLS_C2_RSS_ENTRY(port->id);
+
+       pmap = BIT(port->id);
+       c2.tcam[4] = MVPP22_CLS_C2_PORT_ID(pmap);
+       c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_PORT_ID(pmap));
+
+       /* Update RSS status after matching this entry */
+       c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
+
+       /* Mark packet as "forwarded to software", needed for RSS */
+       c2.act |= MVPP22_CLS_C2_ACT_FWD(MVPP22_C2_FWD_SW_LOCK);
+
+       /* Configure the default rx queue : Update Queue Low and Queue High, but
+        * don't lock, since the rx queue selection might be overridden by RSS
+        */
+       c2.act |= MVPP22_CLS_C2_ACT_QHIGH(MVPP22_C2_UPD) |
+                  MVPP22_CLS_C2_ACT_QLOW(MVPP22_C2_UPD);
+
+       qh = (port->first_rxq >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+       ql = port->first_rxq & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+       c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+                     MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
+       mvpp2_cls_c2_write(port->priv, &c2);
+}
+
 /* Classifier default initialization */
 void mvpp2_cls_init(struct mvpp2 *priv)
 {
@@ -61,6 +839,8 @@ void mvpp2_cls_init(struct mvpp2 *priv)
                le.way = 1;
                mvpp2_cls_lookup_write(priv, &le);
        }
+
+       mvpp2_cls_port_init_flows(priv);
 }
 
 void mvpp2_cls_port_config(struct mvpp2_port *port)
@@ -89,6 +869,47 @@ void mvpp2_cls_port_config(struct mvpp2_port *port)
 
        /* Update lookup ID table entry */
        mvpp2_cls_lookup_write(port->priv, &le);
+
+       mvpp2_port_c2_cls_init(port);
+}
+
+u32 mvpp2_cls_c2_hit_count(struct mvpp2 *priv, int c2_index)
+{
+       mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2_index);
+
+       return mvpp2_read(priv, MVPP22_CLS_C2_HIT_CTR);
+}
+
+static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
+{
+       struct mvpp2_cls_c2_entry c2;
+
+       mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+
+       c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
+
+       mvpp2_cls_c2_write(port->priv, &c2);
+}
+
+static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port)
+{
+       struct mvpp2_cls_c2_entry c2;
+
+       mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+
+       c2.attr[2] &= ~MVPP22_CLS_C2_ATTR2_RSS_EN;
+
+       mvpp2_cls_c2_write(port->priv, &c2);
+}
+
+void mvpp22_rss_enable(struct mvpp2_port *port)
+{
+       mvpp2_rss_port_c2_enable(port);
+}
+
+void mvpp22_rss_disable(struct mvpp2_port *port)
+{
+       mvpp2_rss_port_c2_disable(port);
 }
 
 /* Set CPU queue number for oversize packets */
@@ -107,7 +928,116 @@ void mvpp2_cls_oversize_rxq_set(struct mvpp2_port *port)
        mvpp2_write(port->priv, MVPP2_CLS_SWFWD_PCTRL_REG, val);
 }
 
-void mvpp22_init_rss(struct mvpp2_port *port)
+static inline u32 mvpp22_rxfh_indir(struct mvpp2_port *port, u32 rxq)
+{
+       int nrxqs, cpu, cpus = num_possible_cpus();
+
+       /* Number of RXQs per CPU */
+       nrxqs = port->nrxqs / cpus;
+
+       /* CPU that will handle this rx queue */
+       cpu = rxq / nrxqs;
+
+       if (!cpu_online(cpu))
+               return port->first_rxq;
+
+       /* Indirection to better distribute the paquets on the CPUs when
+        * configuring the RSS queues.
+        */
+       return port->first_rxq + ((rxq * nrxqs + rxq / cpus) % port->nrxqs);
+}
+
+void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table)
+{
+       struct mvpp2 *priv = port->priv;
+       int i;
+
+       for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) {
+               u32 sel = MVPP22_RSS_INDEX_TABLE(table) |
+                         MVPP22_RSS_INDEX_TABLE_ENTRY(i);
+               mvpp2_write(priv, MVPP22_RSS_INDEX, sel);
+
+               mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY,
+                           mvpp22_rxfh_indir(port, port->indir[i]));
+       }
+}
+
+int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info)
+{
+       u16 hash_opts = 0;
+
+       switch (info->flow_type) {
+       case TCP_V4_FLOW:
+       case UDP_V4_FLOW:
+       case TCP_V6_FLOW:
+       case UDP_V6_FLOW:
+               if (info->data & RXH_L4_B_0_1)
+                       hash_opts |= MVPP22_CLS_HEK_OPT_L4SIP;
+               if (info->data & RXH_L4_B_2_3)
+                       hash_opts |= MVPP22_CLS_HEK_OPT_L4DIP;
+               /* Fallthrough */
+       case IPV4_FLOW:
+       case IPV6_FLOW:
+               if (info->data & RXH_L2DA)
+                       hash_opts |= MVPP22_CLS_HEK_OPT_MAC_DA;
+               if (info->data & RXH_VLAN)
+                       hash_opts |= MVPP22_CLS_HEK_OPT_VLAN;
+               if (info->data & RXH_L3_PROTO)
+                       hash_opts |= MVPP22_CLS_HEK_OPT_L3_PROTO;
+               if (info->data & RXH_IP_SRC)
+                       hash_opts |= (MVPP22_CLS_HEK_OPT_IP4SA |
+                                    MVPP22_CLS_HEK_OPT_IP6SA);
+               if (info->data & RXH_IP_DST)
+                       hash_opts |= (MVPP22_CLS_HEK_OPT_IP4DA |
+                                    MVPP22_CLS_HEK_OPT_IP6DA);
+               break;
+       default: return -EOPNOTSUPP;
+       }
+
+       return mvpp2_port_rss_hash_opts_set(port, info->flow_type, hash_opts);
+}
+
+int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info)
+{
+       unsigned long hash_opts;
+       int i;
+
+       hash_opts = mvpp2_port_rss_hash_opts_get(port, info->flow_type);
+       info->data = 0;
+
+       for_each_set_bit(i, &hash_opts, MVPP22_CLS_HEK_N_FIELDS) {
+               switch (BIT(i)) {
+               case MVPP22_CLS_HEK_OPT_MAC_DA:
+                       info->data |= RXH_L2DA;
+                       break;
+               case MVPP22_CLS_HEK_OPT_VLAN:
+                       info->data |= RXH_VLAN;
+                       break;
+               case MVPP22_CLS_HEK_OPT_L3_PROTO:
+                       info->data |= RXH_L3_PROTO;
+                       break;
+               case MVPP22_CLS_HEK_OPT_IP4SA:
+               case MVPP22_CLS_HEK_OPT_IP6SA:
+                       info->data |= RXH_IP_SRC;
+                       break;
+               case MVPP22_CLS_HEK_OPT_IP4DA:
+               case MVPP22_CLS_HEK_OPT_IP6DA:
+                       info->data |= RXH_IP_DST;
+                       break;
+               case MVPP22_CLS_HEK_OPT_L4SIP:
+                       info->data |= RXH_L4_B_0_1;
+                       break;
+               case MVPP22_CLS_HEK_OPT_L4DIP:
+                       info->data |= RXH_L4_B_2_3;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
+
+void mvpp22_rss_port_init(struct mvpp2_port *port)
 {
        struct mvpp2 *priv = port->priv;
        int i;
@@ -115,27 +1045,30 @@ void mvpp22_init_rss(struct mvpp2_port *port)
        /* Set the table width: replace the whole classifier Rx queue number
         * with the ones configured in RSS table entries.
         */
-       mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(0));
+       mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(port->id));
        mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
 
-       /* Loop through the classifier Rx Queues and map them to a RSS table.
-        * Map them all to the first table (0) by default.
+       /* The default RxQ is used as a key to select the RSS table to use.
+        * We use one RSS table per port.
         */
-       for (i = 0; i < MVPP2_CLS_RX_QUEUES; i++) {
-               mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(i));
-               mvpp2_write(priv, MVPP22_RSS_TABLE,
-                           MVPP22_RSS_TABLE_POINTER(0));
-       }
+       mvpp2_write(priv, MVPP22_RSS_INDEX,
+                   MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
+       mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
+                   MVPP22_RSS_TABLE_POINTER(port->id));
 
        /* Configure the first table to evenly distribute the packets across
-        * real Rx Queues. The table entries map a hash to an port Rx Queue.
+        * real Rx Queues. The table entries map a hash to a port Rx Queue.
         */
-       for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) {
-               u32 sel = MVPP22_RSS_INDEX_TABLE(0) |
-                         MVPP22_RSS_INDEX_TABLE_ENTRY(i);
-               mvpp2_write(priv, MVPP22_RSS_INDEX, sel);
+       for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++)
+               port->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
 
-               mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY, i % port->nrxqs);
-       }
+       mvpp22_rss_fill_table(port, port->id);
 
+       /* Configure default flows */
+       mvpp2_port_rss_hash_opts_set(port, IPV4_FLOW, MVPP22_CLS_HEK_IP4_2T);
+       mvpp2_port_rss_hash_opts_set(port, IPV6_FLOW, MVPP22_CLS_HEK_IP6_2T);
+       mvpp2_port_rss_hash_opts_set(port, TCP_V4_FLOW, MVPP22_CLS_HEK_IP4_5T);
+       mvpp2_port_rss_hash_opts_set(port, TCP_V6_FLOW, MVPP22_CLS_HEK_IP6_5T);
+       mvpp2_port_rss_hash_opts_set(port, UDP_V4_FLOW, MVPP22_CLS_HEK_IP4_5T);
+       mvpp2_port_rss_hash_opts_set(port, UDP_V6_FLOW, MVPP22_CLS_HEK_IP6_5T);
 }
index 8e1d7f9ffa0b3cd222ec796106c4e91426391d10..089f05f298917ae394b9245cdb255259d269b3c7 100644 (file)
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * RSS and Classifier definitions for Marvell PPv2 Network Controller
  *
  * Copyright (C) 2014 Marvell
  *
  * Marcin Wojtas <mw@semihalf.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 
 #ifndef _MVPP2_CLS_H_
 #define _MVPP2_CLS_H_
 
+#include "mvpp2.h"
+#include "mvpp2_prs.h"
+
 /* Classifier constants */
 #define MVPP2_CLS_FLOWS_TBL_SIZE       512
 #define MVPP2_CLS_FLOWS_TBL_DATA_WORDS 3
 #define MVPP2_CLS_LKP_TBL_SIZE         64
 #define MVPP2_CLS_RX_QUEUES            256
 
-/* RSS constants */
-#define MVPP22_RSS_TABLE_ENTRIES       32
+/* Classifier flow constants */
+
+#define MVPP2_FLOW_N_FIELDS            4
+
+enum mvpp2_cls_engine {
+       MVPP22_CLS_ENGINE_C2 = 1,
+       MVPP22_CLS_ENGINE_C3A,
+       MVPP22_CLS_ENGINE_C3B,
+       MVPP22_CLS_ENGINE_C4,
+       MVPP22_CLS_ENGINE_C3HA = 6,
+       MVPP22_CLS_ENGINE_C3HB = 7,
+};
+
+#define MVPP22_CLS_HEK_OPT_MAC_DA      BIT(0)
+#define MVPP22_CLS_HEK_OPT_VLAN                BIT(1)
+#define MVPP22_CLS_HEK_OPT_L3_PROTO    BIT(2)
+#define MVPP22_CLS_HEK_OPT_IP4SA       BIT(3)
+#define MVPP22_CLS_HEK_OPT_IP4DA       BIT(4)
+#define MVPP22_CLS_HEK_OPT_IP6SA       BIT(5)
+#define MVPP22_CLS_HEK_OPT_IP6DA       BIT(6)
+#define MVPP22_CLS_HEK_OPT_L4SIP       BIT(7)
+#define MVPP22_CLS_HEK_OPT_L4DIP       BIT(8)
+#define MVPP22_CLS_HEK_N_FIELDS                9
+
+#define MVPP22_CLS_HEK_L4_OPTS (MVPP22_CLS_HEK_OPT_L4SIP | \
+                                MVPP22_CLS_HEK_OPT_L4DIP)
+
+#define MVPP22_CLS_HEK_IP4_2T  (MVPP22_CLS_HEK_OPT_IP4SA | \
+                                MVPP22_CLS_HEK_OPT_IP4DA)
+
+#define MVPP22_CLS_HEK_IP6_2T  (MVPP22_CLS_HEK_OPT_IP6SA | \
+                                MVPP22_CLS_HEK_OPT_IP6DA)
+
+/* The fifth tuple in "5T" is the L4_Info field */
+#define MVPP22_CLS_HEK_IP4_5T  (MVPP22_CLS_HEK_IP4_2T | \
+                                MVPP22_CLS_HEK_L4_OPTS)
+
+#define MVPP22_CLS_HEK_IP6_5T  (MVPP22_CLS_HEK_IP6_2T | \
+                                MVPP22_CLS_HEK_L4_OPTS)
+
+enum mvpp2_cls_field_id {
+       MVPP22_CLS_FIELD_MAC_DA = 0x03,
+       MVPP22_CLS_FIELD_VLAN = 0x06,
+       MVPP22_CLS_FIELD_L3_PROTO = 0x0f,
+       MVPP22_CLS_FIELD_IP4SA = 0x10,
+       MVPP22_CLS_FIELD_IP4DA = 0x11,
+       MVPP22_CLS_FIELD_IP6SA = 0x17,
+       MVPP22_CLS_FIELD_IP6DA = 0x1a,
+       MVPP22_CLS_FIELD_L4SIP = 0x1d,
+       MVPP22_CLS_FIELD_L4DIP = 0x1e,
+};
+
+enum mvpp2_cls_flow_seq {
+       MVPP2_CLS_FLOW_SEQ_NORMAL = 0,
+       MVPP2_CLS_FLOW_SEQ_FIRST1,
+       MVPP2_CLS_FLOW_SEQ_FIRST2,
+       MVPP2_CLS_FLOW_SEQ_LAST,
+       MVPP2_CLS_FLOW_SEQ_MIDDLE
+};
+
+/* Classifier C2 engine constants */
+#define MVPP22_CLS_C2_TCAM_EN(data)            ((data) << 16)
+
+enum mvpp22_cls_c2_action {
+       MVPP22_C2_NO_UPD = 0,
+       MVPP22_C2_NO_UPD_LOCK,
+       MVPP22_C2_UPD,
+       MVPP22_C2_UPD_LOCK,
+};
+
+enum mvpp22_cls_c2_fwd_action {
+       MVPP22_C2_FWD_NO_UPD = 0,
+       MVPP22_C2_FWD_NO_UPD_LOCK,
+       MVPP22_C2_FWD_SW,
+       MVPP22_C2_FWD_SW_LOCK,
+       MVPP22_C2_FWD_HW,
+       MVPP22_C2_FWD_HW_LOCK,
+       MVPP22_C2_FWD_HW_LOW_LAT,
+       MVPP22_C2_FWD_HW_LOW_LAT_LOCK,
+};
+
+#define MVPP2_CLS_C2_TCAM_WORDS                        5
+#define MVPP2_CLS_C2_ATTR_WORDS                        5
+
+struct mvpp2_cls_c2_entry {
+       u32 index;
+       u32 tcam[MVPP2_CLS_C2_TCAM_WORDS];
+       u32 act;
+       u32 attr[MVPP2_CLS_C2_ATTR_WORDS];
+};
+
+/* Classifier C2 engine entries */
+#define MVPP22_CLS_C2_RSS_ENTRY(port)  (port)
+#define MVPP22_CLS_C2_N_ENTRIES                MVPP2_MAX_PORTS
 
+/* RSS flow entries in the flow table. We have 2 entries per port for RSS.
+ *
+ * The first performs a lookup using the C2 TCAM engine, to tag the
+ * packet for software forwarding (needed for RSS), enable or disable RSS, and
+ * assign the default rx queue.
+ *
+ * The second configures the hash generation, by specifying which fields of the
+ * packet header are used to generate the hash, and specifies the relevant hash
+ * engine to use.
+ */
+#define MVPP22_RSS_FLOW_C2_OFFS                0
+#define MVPP22_RSS_FLOW_HASH_OFFS      1
+#define MVPP22_RSS_FLOW_SIZE           (MVPP22_RSS_FLOW_HASH_OFFS + 1)
+
+#define MVPP22_RSS_FLOW_C2(port)       ((port) * MVPP22_RSS_FLOW_SIZE + \
+                                        MVPP22_RSS_FLOW_C2_OFFS)
+#define MVPP22_RSS_FLOW_HASH(port)     ((port) * MVPP22_RSS_FLOW_SIZE + \
+                                        MVPP22_RSS_FLOW_HASH_OFFS)
+#define MVPP22_RSS_FLOW_FIRST(port)    MVPP22_RSS_FLOW_C2(port)
+
+/* Packet flow ID */
+enum mvpp2_prs_flow {
+       MVPP2_FL_START = 8,
+       MVPP2_FL_IP4_TCP_NF_UNTAG = MVPP2_FL_START,
+       MVPP2_FL_IP4_UDP_NF_UNTAG,
+       MVPP2_FL_IP4_TCP_NF_TAG,
+       MVPP2_FL_IP4_UDP_NF_TAG,
+       MVPP2_FL_IP6_TCP_NF_UNTAG,
+       MVPP2_FL_IP6_UDP_NF_UNTAG,
+       MVPP2_FL_IP6_TCP_NF_TAG,
+       MVPP2_FL_IP6_UDP_NF_TAG,
+       MVPP2_FL_IP4_TCP_FRAG_UNTAG,
+       MVPP2_FL_IP4_UDP_FRAG_UNTAG,
+       MVPP2_FL_IP4_TCP_FRAG_TAG,
+       MVPP2_FL_IP4_UDP_FRAG_TAG,
+       MVPP2_FL_IP6_TCP_FRAG_UNTAG,
+       MVPP2_FL_IP6_UDP_FRAG_UNTAG,
+       MVPP2_FL_IP6_TCP_FRAG_TAG,
+       MVPP2_FL_IP6_UDP_FRAG_TAG,
+       MVPP2_FL_IP4_UNTAG, /* non-TCP, non-UDP, same for below */
+       MVPP2_FL_IP4_TAG,
+       MVPP2_FL_IP6_UNTAG,
+       MVPP2_FL_IP6_TAG,
+       MVPP2_FL_NON_IP_UNTAG,
+       MVPP2_FL_NON_IP_TAG,
+       MVPP2_FL_LAST,
+};
+
+struct mvpp2_cls_flow {
+       /* The L2-L4 traffic flow type */
+       int flow_type;
+
+       /* The first id in the flow table for this flow */
+       u16 flow_id;
+
+       /* The supported HEK fields for this flow */
+       u16 supported_hash_opts;
+
+       /* The Header Parser result_info that matches this flow */
+       struct mvpp2_prs_result_info prs_ri;
+};
+
+#define MVPP2_N_FLOWS  52
+
+#define MVPP2_ENTRIES_PER_FLOW                 (MVPP2_MAX_PORTS + 1)
+#define MVPP2_FLOW_C2_ENTRY(id)                        ((id) * MVPP2_ENTRIES_PER_FLOW)
+#define MVPP2_PORT_FLOW_HASH_ENTRY(port, id)   ((id) * MVPP2_ENTRIES_PER_FLOW + \
+                                               (port) + 1)
 struct mvpp2_cls_flow_entry {
        u32 index;
        u32 data[MVPP2_CLS_FLOWS_TBL_DATA_WORDS];
@@ -33,7 +193,15 @@ struct mvpp2_cls_lookup_entry {
        u32 data;
 };
 
-void mvpp22_init_rss(struct mvpp2_port *port);
+void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table);
+
+void mvpp22_rss_port_init(struct mvpp2_port *port);
+
+void mvpp22_rss_enable(struct mvpp2_port *port);
+void mvpp22_rss_disable(struct mvpp2_port *port);
+
+int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info);
+int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info);
 
 void mvpp2_cls_init(struct mvpp2 *priv);
 
@@ -41,4 +209,25 @@ void mvpp2_cls_port_config(struct mvpp2_port *port);
 
 void mvpp2_cls_oversize_rxq_set(struct mvpp2_port *port);
 
+int mvpp2_cls_flow_eng_get(struct mvpp2_cls_flow_entry *fe);
+
+u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe);
+
+struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow);
+
+u32 mvpp2_cls_flow_hits(struct mvpp2 *priv, int index);
+
+void mvpp2_cls_flow_read(struct mvpp2 *priv, int index,
+                        struct mvpp2_cls_flow_entry *fe);
+
+u32 mvpp2_cls_lookup_hits(struct mvpp2 *priv, int index);
+
+void mvpp2_cls_lookup_read(struct mvpp2 *priv, int lkpid, int way,
+                          struct mvpp2_cls_lookup_entry *le);
+
+u32 mvpp2_cls_c2_hit_count(struct mvpp2 *priv, int c2_index);
+
+void mvpp2_cls_c2_read(struct mvpp2 *priv, int index,
+                      struct mvpp2_cls_c2_entry *c2);
+
 #endif
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c
new file mode 100644 (file)
index 0000000..f9744a6
--- /dev/null
@@ -0,0 +1,703 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Marvell PPv2 network controller for Armada 375 SoC.
+ *
+ * Copyright (C) 2018 Marvell
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+
+#include "mvpp2.h"
+#include "mvpp2_prs.h"
+#include "mvpp2_cls.h"
+
+struct mvpp2_dbgfs_prs_entry {
+       int tid;
+       struct mvpp2 *priv;
+};
+
+struct mvpp2_dbgfs_flow_entry {
+       int flow;
+       struct mvpp2 *priv;
+};
+
+struct mvpp2_dbgfs_port_flow_entry {
+       struct mvpp2_port *port;
+       struct mvpp2_dbgfs_flow_entry *dbg_fe;
+};
+
+static int mvpp2_dbgfs_flow_flt_hits_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_flow_entry *entry = s->private;
+       int id = MVPP2_FLOW_C2_ENTRY(entry->flow);
+
+       u32 hits = mvpp2_cls_flow_hits(entry->priv, id);
+
+       seq_printf(s, "%u\n", hits);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_flt_hits);
+
+static int mvpp2_dbgfs_flow_dec_hits_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_flow_entry *entry = s->private;
+
+       u32 hits = mvpp2_cls_lookup_hits(entry->priv, entry->flow);
+
+       seq_printf(s, "%u\n", hits);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_dec_hits);
+
+static int mvpp2_dbgfs_flow_type_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_flow_entry *entry = s->private;
+       struct mvpp2_cls_flow *f;
+       const char *flow_name;
+
+       f = mvpp2_cls_flow_get(entry->flow);
+       if (!f)
+               return -EINVAL;
+
+       switch (f->flow_type) {
+       case IPV4_FLOW:
+               flow_name = "ipv4";
+               break;
+       case IPV6_FLOW:
+               flow_name = "ipv6";
+               break;
+       case TCP_V4_FLOW:
+               flow_name = "tcp4";
+               break;
+       case TCP_V6_FLOW:
+               flow_name = "tcp6";
+               break;
+       case UDP_V4_FLOW:
+               flow_name = "udp4";
+               break;
+       case UDP_V6_FLOW:
+               flow_name = "udp6";
+               break;
+       default:
+               flow_name = "other";
+       }
+
+       seq_printf(s, "%s\n", flow_name);
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_flow_type_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, mvpp2_dbgfs_flow_type_show, inode->i_private);
+}
+
+static int mvpp2_dbgfs_flow_type_release(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq = file->private_data;
+       struct mvpp2_dbgfs_flow_entry *flow_entry = seq->private;
+
+       kfree(flow_entry);
+       return single_release(inode, file);
+}
+
+static const struct file_operations mvpp2_dbgfs_flow_type_fops = {
+       .open = mvpp2_dbgfs_flow_type_open,
+       .read = seq_read,
+       .release = mvpp2_dbgfs_flow_type_release,
+};
+
+static int mvpp2_dbgfs_flow_id_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_flow_entry *entry = s->private;
+       struct mvpp2_cls_flow *f;
+
+       f = mvpp2_cls_flow_get(entry->flow);
+       if (!f)
+               return -EINVAL;
+
+       seq_printf(s, "%d\n", f->flow_id);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_id);
+
+static int mvpp2_dbgfs_port_flow_hash_opt_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_port_flow_entry *entry = s->private;
+       struct mvpp2_port *port = entry->port;
+       struct mvpp2_cls_flow_entry fe;
+       struct mvpp2_cls_flow *f;
+       int flow_index;
+       u16 hash_opts;
+
+       f = mvpp2_cls_flow_get(entry->dbg_fe->flow);
+       if (!f)
+               return -EINVAL;
+
+       flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(entry->port->id, f->flow_id);
+
+       mvpp2_cls_flow_read(port->priv, flow_index, &fe);
+
+       hash_opts = mvpp2_flow_get_hek_fields(&fe);
+
+       seq_printf(s, "0x%04x\n", hash_opts);
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_port_flow_hash_opt_open(struct inode *inode,
+                                              struct file *file)
+{
+       return single_open(file, mvpp2_dbgfs_port_flow_hash_opt_show,
+                          inode->i_private);
+}
+
+static int mvpp2_dbgfs_port_flow_hash_opt_release(struct inode *inode,
+                                                 struct file *file)
+{
+       struct seq_file *seq = file->private_data;
+       struct mvpp2_dbgfs_port_flow_entry *flow_entry = seq->private;
+
+       kfree(flow_entry);
+       return single_release(inode, file);
+}
+
+static const struct file_operations mvpp2_dbgfs_port_flow_hash_opt_fops = {
+       .open = mvpp2_dbgfs_port_flow_hash_opt_open,
+       .read = seq_read,
+       .release = mvpp2_dbgfs_port_flow_hash_opt_release,
+};
+
+static int mvpp2_dbgfs_port_flow_engine_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_port_flow_entry *entry = s->private;
+       struct mvpp2_port *port = entry->port;
+       struct mvpp2_cls_flow_entry fe;
+       struct mvpp2_cls_flow *f;
+       int flow_index, engine;
+
+       f = mvpp2_cls_flow_get(entry->dbg_fe->flow);
+       if (!f)
+               return -EINVAL;
+
+       flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(entry->port->id, f->flow_id);
+
+       mvpp2_cls_flow_read(port->priv, flow_index, &fe);
+
+       engine = mvpp2_cls_flow_eng_get(&fe);
+
+       seq_printf(s, "%d\n", engine);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_port_flow_engine);
+
+static int mvpp2_dbgfs_flow_c2_hits_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_port *port = s->private;
+       u32 hits;
+
+       hits = mvpp2_cls_c2_hit_count(port->priv,
+                                     MVPP22_CLS_C2_RSS_ENTRY(port->id));
+
+       seq_printf(s, "%u\n", hits);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_c2_hits);
+
+static int mvpp2_dbgfs_flow_c2_rxq_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_port *port = s->private;
+       struct mvpp2_cls_c2_entry c2;
+       u8 qh, ql;
+
+       mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+
+       qh = (c2.attr[0] >> MVPP22_CLS_C2_ATTR0_QHIGH_OFFS) &
+            MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+
+       ql = (c2.attr[0] >> MVPP22_CLS_C2_ATTR0_QLOW_OFFS) &
+            MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+       seq_printf(s, "%d\n", (qh << 3 | ql));
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_c2_rxq);
+
+static int mvpp2_dbgfs_flow_c2_enable_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_port *port = s->private;
+       struct mvpp2_cls_c2_entry c2;
+       int enabled;
+
+       mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+
+       enabled = !!(c2.attr[2] & MVPP22_CLS_C2_ATTR2_RSS_EN);
+
+       seq_printf(s, "%d\n", enabled);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_c2_enable);
+
+static int mvpp2_dbgfs_port_vid_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_port *port = s->private;
+       unsigned char byte[2], enable[2];
+       struct mvpp2 *priv = port->priv;
+       struct mvpp2_prs_entry pe;
+       unsigned long pmap;
+       u16 rvid;
+       int tid;
+
+       for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+            tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+               mvpp2_prs_init_from_hw(priv, &pe, tid);
+
+               pmap = mvpp2_prs_tcam_port_map_get(&pe);
+
+               if (!priv->prs_shadow[tid].valid)
+                       continue;
+
+               if (!test_bit(port->id, &pmap))
+                       continue;
+
+               mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+               mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+               rvid = ((byte[0] & 0xf) << 8) + byte[1];
+
+               seq_printf(s, "%u\n", rvid);
+       }
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_port_vid);
+
+static int mvpp2_dbgfs_port_parser_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_port *port = s->private;
+       struct mvpp2 *priv = port->priv;
+       struct mvpp2_prs_entry pe;
+       unsigned long pmap;
+       int i;
+
+       for (i = 0; i < MVPP2_PRS_TCAM_SRAM_SIZE; i++) {
+               mvpp2_prs_init_from_hw(port->priv, &pe, i);
+
+               pmap = mvpp2_prs_tcam_port_map_get(&pe);
+               if (priv->prs_shadow[i].valid && test_bit(port->id, &pmap))
+                       seq_printf(s, "%03d\n", i);
+       }
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_port_parser);
+
+static int mvpp2_dbgfs_filter_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_port *port = s->private;
+       struct mvpp2 *priv = port->priv;
+       struct mvpp2_prs_entry pe;
+       unsigned long pmap;
+       int index, tid;
+
+       for (tid = MVPP2_PE_MAC_RANGE_START;
+            tid <= MVPP2_PE_MAC_RANGE_END; tid++) {
+               unsigned char da[ETH_ALEN], da_mask[ETH_ALEN];
+
+               if (!priv->prs_shadow[tid].valid ||
+                   priv->prs_shadow[tid].lu != MVPP2_PRS_LU_MAC ||
+                   priv->prs_shadow[tid].udf != MVPP2_PRS_UDF_MAC_DEF)
+                       continue;
+
+               mvpp2_prs_init_from_hw(priv, &pe, tid);
+
+               pmap = mvpp2_prs_tcam_port_map_get(&pe);
+
+               /* We only want entries active on this port */
+               if (!test_bit(port->id, &pmap))
+                       continue;
+
+               /* Read mac addr from entry */
+               for (index = 0; index < ETH_ALEN; index++)
+                       mvpp2_prs_tcam_data_byte_get(&pe, index, &da[index],
+                                                    &da_mask[index]);
+
+               seq_printf(s, "%pM\n", da);
+       }
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_filter);
+
+static int mvpp2_dbgfs_prs_lu_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_prs_entry *entry = s->private;
+       struct mvpp2 *priv = entry->priv;
+
+       seq_printf(s, "%x\n", priv->prs_shadow[entry->tid].lu);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_prs_lu);
+
+static int mvpp2_dbgfs_prs_pmap_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_prs_entry *entry = s->private;
+       struct mvpp2_prs_entry pe;
+       unsigned int pmap;
+
+       mvpp2_prs_init_from_hw(entry->priv, &pe, entry->tid);
+
+       pmap = mvpp2_prs_tcam_port_map_get(&pe);
+       pmap &= MVPP2_PRS_PORT_MASK;
+
+       seq_printf(s, "%02x\n", pmap);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_prs_pmap);
+
+static int mvpp2_dbgfs_prs_ai_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_prs_entry *entry = s->private;
+       struct mvpp2_prs_entry pe;
+       unsigned char ai, ai_mask;
+
+       mvpp2_prs_init_from_hw(entry->priv, &pe, entry->tid);
+
+       ai = pe.tcam[MVPP2_PRS_TCAM_AI_WORD] & MVPP2_PRS_AI_MASK;
+       ai_mask = (pe.tcam[MVPP2_PRS_TCAM_AI_WORD] >> 16) & MVPP2_PRS_AI_MASK;
+
+       seq_printf(s, "%02x %02x\n", ai, ai_mask);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_prs_ai);
+
+static int mvpp2_dbgfs_prs_hdata_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_prs_entry *entry = s->private;
+       struct mvpp2_prs_entry pe;
+       unsigned char data[8], mask[8];
+       int i;
+
+       mvpp2_prs_init_from_hw(entry->priv, &pe, entry->tid);
+
+       for (i = 0; i < 8; i++)
+               mvpp2_prs_tcam_data_byte_get(&pe, i, &data[i], &mask[i]);
+
+       seq_printf(s, "%*phN %*phN\n", 8, data, 8, mask);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_prs_hdata);
+
+static int mvpp2_dbgfs_prs_sram_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_prs_entry *entry = s->private;
+       struct mvpp2_prs_entry pe;
+
+       mvpp2_prs_init_from_hw(entry->priv, &pe, entry->tid);
+
+       seq_printf(s, "%*phN\n", 14, pe.sram);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_prs_sram);
+
+static int mvpp2_dbgfs_prs_hits_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_prs_entry *entry = s->private;
+       int val;
+
+       val = mvpp2_prs_hits(entry->priv, entry->tid);
+       if (val < 0)
+               return val;
+
+       seq_printf(s, "%d\n", val);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_prs_hits);
+
+static int mvpp2_dbgfs_prs_valid_show(struct seq_file *s, void *unused)
+{
+       struct mvpp2_dbgfs_prs_entry *entry = s->private;
+       struct mvpp2 *priv = entry->priv;
+       int tid = entry->tid;
+
+       seq_printf(s, "%d\n", priv->prs_shadow[tid].valid ? 1 : 0);
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_prs_valid_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, mvpp2_dbgfs_prs_valid_show, inode->i_private);
+}
+
+static int mvpp2_dbgfs_prs_valid_release(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq = file->private_data;
+       struct mvpp2_dbgfs_prs_entry *entry = seq->private;
+
+       kfree(entry);
+       return single_release(inode, file);
+}
+
+static const struct file_operations mvpp2_dbgfs_prs_valid_fops = {
+       .open = mvpp2_dbgfs_prs_valid_open,
+       .read = seq_read,
+       .release = mvpp2_dbgfs_prs_valid_release,
+};
+
+static int mvpp2_dbgfs_flow_port_init(struct dentry *parent,
+                                     struct mvpp2_port *port,
+                                     struct mvpp2_dbgfs_flow_entry *entry)
+{
+       struct mvpp2_dbgfs_port_flow_entry *port_entry;
+       struct dentry *port_dir;
+
+       port_dir = debugfs_create_dir(port->dev->name, parent);
+       if (IS_ERR(port_dir))
+               return PTR_ERR(port_dir);
+
+       /* This will be freed by 'hash_opts' release op */
+       port_entry = kmalloc(sizeof(*port_entry), GFP_KERNEL);
+       if (!port_entry)
+               return -ENOMEM;
+
+       port_entry->port = port;
+       port_entry->dbg_fe = entry;
+
+       debugfs_create_file("hash_opts", 0444, port_dir, port_entry,
+                           &mvpp2_dbgfs_port_flow_hash_opt_fops);
+
+       debugfs_create_file("engine", 0444, port_dir, port_entry,
+                           &mvpp2_dbgfs_port_flow_engine_fops);
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_flow_entry_init(struct dentry *parent,
+                                      struct mvpp2 *priv, int flow)
+{
+       struct mvpp2_dbgfs_flow_entry *entry;
+       struct dentry *flow_entry_dir;
+       char flow_entry_name[10];
+       int i, ret;
+
+       sprintf(flow_entry_name, "%02d", flow);
+
+       flow_entry_dir = debugfs_create_dir(flow_entry_name, parent);
+       if (!flow_entry_dir)
+               return -ENOMEM;
+
+       /* This will be freed by 'type' release op */
+       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+
+       entry->flow = flow;
+       entry->priv = priv;
+
+       debugfs_create_file("flow_hits", 0444, flow_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_flt_hits_fops);
+
+       debugfs_create_file("dec_hits", 0444, flow_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_dec_hits_fops);
+
+       debugfs_create_file("type", 0444, flow_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_type_fops);
+
+       debugfs_create_file("id", 0444, flow_entry_dir, entry,
+                           &mvpp2_dbgfs_flow_id_fops);
+
+       /* Create entry for each port */
+       for (i = 0; i < priv->port_count; i++) {
+               ret = mvpp2_dbgfs_flow_port_init(flow_entry_dir,
+                                                priv->port_list[i], entry);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+static int mvpp2_dbgfs_flow_init(struct dentry *parent, struct mvpp2 *priv)
+{
+       struct dentry *flow_dir;
+       int i, ret;
+
+       flow_dir = debugfs_create_dir("flows", parent);
+       if (!flow_dir)
+               return -ENOMEM;
+
+       for (i = 0; i < MVPP2_N_FLOWS; i++) {
+               ret = mvpp2_dbgfs_flow_entry_init(flow_dir, priv, i);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_prs_entry_init(struct dentry *parent,
+                                     struct mvpp2 *priv, int tid)
+{
+       struct mvpp2_dbgfs_prs_entry *entry;
+       struct dentry *prs_entry_dir;
+       char prs_entry_name[10];
+
+       if (tid >= MVPP2_PRS_TCAM_SRAM_SIZE)
+               return -EINVAL;
+
+       sprintf(prs_entry_name, "%03d", tid);
+
+       prs_entry_dir = debugfs_create_dir(prs_entry_name, parent);
+       if (!prs_entry_dir)
+               return -ENOMEM;
+
+       /* The 'valid' entry's ops will free that */
+       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+
+       entry->tid = tid;
+       entry->priv = priv;
+
+       /* Create each attr */
+       debugfs_create_file("sram", 0444, prs_entry_dir, entry,
+                           &mvpp2_dbgfs_prs_sram_fops);
+
+       debugfs_create_file("valid", 0644, prs_entry_dir, entry,
+                           &mvpp2_dbgfs_prs_valid_fops);
+
+       debugfs_create_file("lookup_id", 0644, prs_entry_dir, entry,
+                           &mvpp2_dbgfs_prs_lu_fops);
+
+       debugfs_create_file("ai", 0644, prs_entry_dir, entry,
+                           &mvpp2_dbgfs_prs_ai_fops);
+
+       debugfs_create_file("header_data", 0644, prs_entry_dir, entry,
+                           &mvpp2_dbgfs_prs_hdata_fops);
+
+       debugfs_create_file("hits", 0444, prs_entry_dir, entry,
+                           &mvpp2_dbgfs_prs_hits_fops);
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_prs_init(struct dentry *parent, struct mvpp2 *priv)
+{
+       struct dentry *prs_dir;
+       int i, ret;
+
+       prs_dir = debugfs_create_dir("parser", parent);
+       if (!prs_dir)
+               return -ENOMEM;
+
+       for (i = 0; i < MVPP2_PRS_TCAM_SRAM_SIZE; i++) {
+               ret = mvpp2_dbgfs_prs_entry_init(prs_dir, priv, i);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int mvpp2_dbgfs_port_init(struct dentry *parent,
+                                struct mvpp2_port *port)
+{
+       struct dentry *port_dir;
+
+       port_dir = debugfs_create_dir(port->dev->name, parent);
+       if (IS_ERR(port_dir))
+               return PTR_ERR(port_dir);
+
+       debugfs_create_file("parser_entries", 0444, port_dir, port,
+                           &mvpp2_dbgfs_port_parser_fops);
+
+       debugfs_create_file("mac_filter", 0444, port_dir, port,
+                           &mvpp2_dbgfs_filter_fops);
+
+       debugfs_create_file("vid_filter", 0444, port_dir, port,
+                           &mvpp2_dbgfs_port_vid_fops);
+
+       debugfs_create_file("c2_hits", 0444, port_dir, port,
+                           &mvpp2_dbgfs_flow_c2_hits_fops);
+
+       debugfs_create_file("default_rxq", 0444, port_dir, port,
+                           &mvpp2_dbgfs_flow_c2_rxq_fops);
+
+       debugfs_create_file("rss_enable", 0444, port_dir, port,
+                           &mvpp2_dbgfs_flow_c2_enable_fops);
+
+       return 0;
+}
+
+void mvpp2_dbgfs_cleanup(struct mvpp2 *priv)
+{
+       debugfs_remove_recursive(priv->dbgfs_dir);
+}
+
+void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name)
+{
+       struct dentry *mvpp2_dir, *mvpp2_root;
+       int ret, i;
+
+       mvpp2_root = debugfs_lookup(MVPP2_DRIVER_NAME, NULL);
+       if (!mvpp2_root) {
+               mvpp2_root = debugfs_create_dir(MVPP2_DRIVER_NAME, NULL);
+               if (IS_ERR(mvpp2_root))
+                       return;
+       }
+
+       mvpp2_dir = debugfs_create_dir(name, mvpp2_root);
+       if (IS_ERR(mvpp2_dir))
+               return;
+
+       priv->dbgfs_dir = mvpp2_dir;
+
+       ret = mvpp2_dbgfs_prs_init(mvpp2_dir, priv);
+       if (ret)
+               goto err;
+
+       for (i = 0; i < priv->port_count; i++) {
+               ret = mvpp2_dbgfs_port_init(mvpp2_dir, priv->port_list[i]);
+               if (ret)
+                       goto err;
+       }
+
+       ret = mvpp2_dbgfs_flow_init(mvpp2_dir, priv);
+       if (ret)
+               goto err;
+
+       return;
+err:
+       mvpp2_dbgfs_cleanup(priv);
+}
index 0319ed9ef8b815518a490cdd098018fcda46738a..32d785b616e1e270f2adb47978ce3b3f172f02ac 100644 (file)
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Driver for Marvell PPv2 network controller for Armada 375 SoC.
  *
  * Copyright (C) 2014 Marvell
  *
  * Marcin Wojtas <mw@semihalf.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 
 #include <linux/acpi.h>
@@ -66,7 +63,7 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
 #define MVPP2_QDIST_SINGLE_MODE        0
 #define MVPP2_QDIST_MULTI_MODE 1
 
-static int queue_mode = MVPP2_QDIST_SINGLE_MODE;
+static int queue_mode = MVPP2_QDIST_MULTI_MODE;
 
 module_param(queue_mode, int, 0444);
 MODULE_PARM_DESC(queue_mode, "Set queue_mode (single=0, multi=1)");
@@ -151,9 +148,10 @@ static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
                                            struct mvpp2_tx_desc *tx_desc)
 {
        if (port->priv->hw_version == MVPP21)
-               return tx_desc->pp21.buf_dma_addr;
+               return le32_to_cpu(tx_desc->pp21.buf_dma_addr);
        else
-               return tx_desc->pp22.buf_dma_addr_ptp & MVPP2_DESC_DMA_MASK;
+               return le64_to_cpu(tx_desc->pp22.buf_dma_addr_ptp) &
+                      MVPP2_DESC_DMA_MASK;
 }
 
 static void mvpp2_txdesc_dma_addr_set(struct mvpp2_port *port,
@@ -166,12 +164,12 @@ static void mvpp2_txdesc_dma_addr_set(struct mvpp2_port *port,
        offset = dma_addr & MVPP2_TX_DESC_ALIGN;
 
        if (port->priv->hw_version == MVPP21) {
-               tx_desc->pp21.buf_dma_addr = addr;
+               tx_desc->pp21.buf_dma_addr = cpu_to_le32(addr);
                tx_desc->pp21.packet_offset = offset;
        } else {
-               u64 val = (u64)addr;
+               __le64 val = cpu_to_le64(addr);
 
-               tx_desc->pp22.buf_dma_addr_ptp &= ~MVPP2_DESC_DMA_MASK;
+               tx_desc->pp22.buf_dma_addr_ptp &= ~cpu_to_le64(MVPP2_DESC_DMA_MASK);
                tx_desc->pp22.buf_dma_addr_ptp |= val;
                tx_desc->pp22.packet_offset = offset;
        }
@@ -181,9 +179,9 @@ static size_t mvpp2_txdesc_size_get(struct mvpp2_port *port,
                                    struct mvpp2_tx_desc *tx_desc)
 {
        if (port->priv->hw_version == MVPP21)
-               return tx_desc->pp21.data_size;
+               return le16_to_cpu(tx_desc->pp21.data_size);
        else
-               return tx_desc->pp22.data_size;
+               return le16_to_cpu(tx_desc->pp22.data_size);
 }
 
 static void mvpp2_txdesc_size_set(struct mvpp2_port *port,
@@ -191,9 +189,9 @@ static void mvpp2_txdesc_size_set(struct mvpp2_port *port,
                                  size_t size)
 {
        if (port->priv->hw_version == MVPP21)
-               tx_desc->pp21.data_size = size;
+               tx_desc->pp21.data_size = cpu_to_le16(size);
        else
-               tx_desc->pp22.data_size = size;
+               tx_desc->pp22.data_size = cpu_to_le16(size);
 }
 
 static void mvpp2_txdesc_txq_set(struct mvpp2_port *port,
@@ -211,9 +209,9 @@ static void mvpp2_txdesc_cmd_set(struct mvpp2_port *port,
                                 unsigned int command)
 {
        if (port->priv->hw_version == MVPP21)
-               tx_desc->pp21.command = command;
+               tx_desc->pp21.command = cpu_to_le32(command);
        else
-               tx_desc->pp22.command = command;
+               tx_desc->pp22.command = cpu_to_le32(command);
 }
 
 static unsigned int mvpp2_txdesc_offset_get(struct mvpp2_port *port,
@@ -229,36 +227,38 @@ static dma_addr_t mvpp2_rxdesc_dma_addr_get(struct mvpp2_port *port,
                                            struct mvpp2_rx_desc *rx_desc)
 {
        if (port->priv->hw_version == MVPP21)
-               return rx_desc->pp21.buf_dma_addr;
+               return le32_to_cpu(rx_desc->pp21.buf_dma_addr);
        else
-               return rx_desc->pp22.buf_dma_addr_key_hash & MVPP2_DESC_DMA_MASK;
+               return le64_to_cpu(rx_desc->pp22.buf_dma_addr_key_hash) &
+                      MVPP2_DESC_DMA_MASK;
 }
 
 static unsigned long mvpp2_rxdesc_cookie_get(struct mvpp2_port *port,
                                             struct mvpp2_rx_desc *rx_desc)
 {
        if (port->priv->hw_version == MVPP21)
-               return rx_desc->pp21.buf_cookie;
+               return le32_to_cpu(rx_desc->pp21.buf_cookie);
        else
-               return rx_desc->pp22.buf_cookie_misc & MVPP2_DESC_DMA_MASK;
+               return le64_to_cpu(rx_desc->pp22.buf_cookie_misc) &
+                      MVPP2_DESC_DMA_MASK;
 }
 
 static size_t mvpp2_rxdesc_size_get(struct mvpp2_port *port,
                                    struct mvpp2_rx_desc *rx_desc)
 {
        if (port->priv->hw_version == MVPP21)
-               return rx_desc->pp21.data_size;
+               return le16_to_cpu(rx_desc->pp21.data_size);
        else
-               return rx_desc->pp22.data_size;
+               return le16_to_cpu(rx_desc->pp22.data_size);
 }
 
 static u32 mvpp2_rxdesc_status_get(struct mvpp2_port *port,
                                   struct mvpp2_rx_desc *rx_desc)
 {
        if (port->priv->hw_version == MVPP21)
-               return rx_desc->pp21.status;
+               return le32_to_cpu(rx_desc->pp21.status);
        else
-               return rx_desc->pp22.status;
+               return le32_to_cpu(rx_desc->pp22.status);
 }
 
 static void mvpp2_txq_inc_get(struct mvpp2_txq_pcpu *txq_pcpu)
@@ -1735,7 +1735,7 @@ static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto,
        command |= (ip_hdr_len << MVPP2_TXD_IP_HLEN_SHIFT);
        command |= MVPP2_TXD_IP_CSUM_DISABLE;
 
-       if (l3_proto == swab16(ETH_P_IP)) {
+       if (l3_proto == htons(ETH_P_IP)) {
                command &= ~MVPP2_TXD_IP_CSUM_DISABLE;  /* enable IPv4 csum */
                command &= ~MVPP2_TXD_L3_IP6;           /* enable IPv4 */
        } else {
@@ -3273,6 +3273,11 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port)
        }
 }
 
+static bool mvpp22_rss_is_supported(void)
+{
+       return queue_mode == MVPP2_QDIST_MULTI_MODE;
+}
+
 static int mvpp2_open(struct net_device *dev)
 {
        struct mvpp2_port *port = netdev_priv(dev);
@@ -3365,9 +3370,6 @@ static int mvpp2_open(struct net_device *dev)
 
        mvpp2_start_dev(port);
 
-       if (priv->hw_version == MVPP22)
-               mvpp22_init_rss(port);
-
        /* Start hardware statistics gathering */
        queue_delayed_work(priv->stats_queue, &port->stats_work,
                           MVPP2_MIB_COUNTERS_STATS_DELAY);
@@ -3626,6 +3628,13 @@ static int mvpp2_set_features(struct net_device *dev,
                }
        }
 
+       if (changed & NETIF_F_RXHASH) {
+               if (features & NETIF_F_RXHASH)
+                       mvpp22_rss_enable(port);
+               else
+                       mvpp22_rss_disable(port);
+       }
+
        return 0;
 }
 
@@ -3813,6 +3822,94 @@ static int mvpp2_ethtool_set_link_ksettings(struct net_device *dev,
        return phylink_ethtool_ksettings_set(port->phylink, cmd);
 }
 
+static int mvpp2_ethtool_get_rxnfc(struct net_device *dev,
+                                  struct ethtool_rxnfc *info, u32 *rules)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       int ret = 0;
+
+       if (!mvpp22_rss_is_supported())
+               return -EOPNOTSUPP;
+
+       switch (info->cmd) {
+       case ETHTOOL_GRXFH:
+               ret = mvpp2_ethtool_rxfh_get(port, info);
+               break;
+       case ETHTOOL_GRXRINGS:
+               info->data = port->nrxqs;
+               break;
+       default:
+               return -ENOTSUPP;
+       }
+
+       return ret;
+}
+
+static int mvpp2_ethtool_set_rxnfc(struct net_device *dev,
+                                  struct ethtool_rxnfc *info)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       int ret = 0;
+
+       if (!mvpp22_rss_is_supported())
+               return -EOPNOTSUPP;
+
+       switch (info->cmd) {
+       case ETHTOOL_SRXFH:
+               ret = mvpp2_ethtool_rxfh_set(port, info);
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+       return ret;
+}
+
+static u32 mvpp2_ethtool_get_rxfh_indir_size(struct net_device *dev)
+{
+       return mvpp22_rss_is_supported() ? MVPP22_RSS_TABLE_ENTRIES : 0;
+}
+
+static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
+                                 u8 *hfunc)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (!mvpp22_rss_is_supported())
+               return -EOPNOTSUPP;
+
+       if (indir)
+               memcpy(indir, port->indir,
+                      ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
+
+       if (hfunc)
+               *hfunc = ETH_RSS_HASH_CRC32;
+
+       return 0;
+}
+
+static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
+                                 const u8 *key, const u8 hfunc)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       if (!mvpp22_rss_is_supported())
+               return -EOPNOTSUPP;
+
+       if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32)
+               return -EOPNOTSUPP;
+
+       if (key)
+               return -EOPNOTSUPP;
+
+       if (indir) {
+               memcpy(port->indir, indir,
+                      ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
+               mvpp22_rss_fill_table(port, port->id);
+       }
+
+       return 0;
+}
+
 /* Device ops */
 
 static const struct net_device_ops mvpp2_netdev_ops = {
@@ -3844,6 +3941,12 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
        .set_pauseparam         = mvpp2_ethtool_set_pause_param,
        .get_link_ksettings     = mvpp2_ethtool_get_link_ksettings,
        .set_link_ksettings     = mvpp2_ethtool_set_link_ksettings,
+       .get_rxnfc              = mvpp2_ethtool_get_rxnfc,
+       .set_rxnfc              = mvpp2_ethtool_set_rxnfc,
+       .get_rxfh_indir_size    = mvpp2_ethtool_get_rxfh_indir_size,
+       .get_rxfh               = mvpp2_ethtool_get_rxfh,
+       .set_rxfh               = mvpp2_ethtool_set_rxfh,
+
 };
 
 /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
@@ -3985,8 +4088,8 @@ static int mvpp2_port_init(struct mvpp2_port *port)
            MVPP2_MAX_PORTS * priv->max_port_rxqs)
                return -EINVAL;
 
-       if (port->nrxqs % 4 || (port->nrxqs > priv->max_port_rxqs) ||
-           (port->ntxqs > MVPP2_MAX_TXQ))
+       if (port->nrxqs % MVPP2_DEFAULT_RXQ ||
+           port->nrxqs > priv->max_port_rxqs || port->ntxqs > MVPP2_MAX_TXQ)
                return -EINVAL;
 
        /* Disable port */
@@ -4075,6 +4178,9 @@ static int mvpp2_port_init(struct mvpp2_port *port)
        mvpp2_cls_oversize_rxq_set(port);
        mvpp2_cls_port_config(port);
 
+       if (mvpp22_rss_is_supported())
+               mvpp22_rss_port_init(port);
+
        /* Provide an initial Rx packet size */
        port->pkt_size = MVPP2_RX_PKT_SIZE(port->dev->mtu);
 
@@ -4681,6 +4787,9 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
                            NETIF_F_HW_VLAN_CTAG_FILTER;
 
+       if (mvpp22_rss_is_supported())
+               dev->hw_features |= NETIF_F_RXHASH;
+
        if (port->pool_long->id == MVPP2_BM_JUMBO && port->id != 0) {
                dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
                dev->hw_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
@@ -5011,6 +5120,12 @@ static int mvpp2_probe(struct platform_device *pdev)
                        (unsigned long)of_device_get_match_data(&pdev->dev);
        }
 
+       /* multi queue mode isn't supported on PPV2.1, fallback to single
+        * mode
+        */
+       if (priv->hw_version == MVPP21)
+               queue_mode = MVPP2_QDIST_SINGLE_MODE;
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(base))
@@ -5174,6 +5289,8 @@ static int mvpp2_probe(struct platform_device *pdev)
                goto err_port_probe;
        }
 
+       mvpp2_dbgfs_init(priv, pdev->name);
+
        platform_set_drvdata(pdev, priv);
        return 0;
 
@@ -5207,6 +5324,8 @@ static int mvpp2_remove(struct platform_device *pdev)
        struct fwnode_handle *port_fwnode;
        int i = 0;
 
+       mvpp2_dbgfs_cleanup(priv);
+
        flush_workqueue(priv->stats_queue);
        destroy_workqueue(priv->stats_queue);
 
index 6bb69f086794ffdc16af81418aeeadc9766f9cf3..392fd895f27826e81153f230603fa37b8e921fcc 100644 (file)
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Header Parser helpers for Marvell PPv2 Network Controller
  *
  * Copyright (C) 2014 Marvell
  *
  * Marcin Wojtas <mw@semihalf.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 
 #include <linux/kernel.h>
@@ -30,24 +27,24 @@ static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe)
                return -EINVAL;
 
        /* Clear entry invalidation bit */
-       pe->tcam.word[MVPP2_PRS_TCAM_INV_WORD] &= ~MVPP2_PRS_TCAM_INV_MASK;
+       pe->tcam[MVPP2_PRS_TCAM_INV_WORD] &= ~MVPP2_PRS_TCAM_INV_MASK;
 
        /* Write tcam index - indirect access */
        mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index);
        for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++)
-               mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(i), pe->tcam.word[i]);
+               mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(i), pe->tcam[i]);
 
        /* Write sram index - indirect access */
        mvpp2_write(priv, MVPP2_PRS_SRAM_IDX_REG, pe->index);
        for (i = 0; i < MVPP2_PRS_SRAM_WORDS; i++)
-               mvpp2_write(priv, MVPP2_PRS_SRAM_DATA_REG(i), pe->sram.word[i]);
+               mvpp2_write(priv, MVPP2_PRS_SRAM_DATA_REG(i), pe->sram[i]);
 
        return 0;
 }
 
 /* Initialize tcam entry from hw */
-static int mvpp2_prs_init_from_hw(struct mvpp2 *priv,
-                                 struct mvpp2_prs_entry *pe, int tid)
+int mvpp2_prs_init_from_hw(struct mvpp2 *priv, struct mvpp2_prs_entry *pe,
+                          int tid)
 {
        int i;
 
@@ -60,18 +57,18 @@ static int mvpp2_prs_init_from_hw(struct mvpp2 *priv,
        /* Write tcam index - indirect access */
        mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index);
 
-       pe->tcam.word[MVPP2_PRS_TCAM_INV_WORD] = mvpp2_read(priv,
+       pe->tcam[MVPP2_PRS_TCAM_INV_WORD] = mvpp2_read(priv,
                              MVPP2_PRS_TCAM_DATA_REG(MVPP2_PRS_TCAM_INV_WORD));
-       if (pe->tcam.word[MVPP2_PRS_TCAM_INV_WORD] & MVPP2_PRS_TCAM_INV_MASK)
+       if (pe->tcam[MVPP2_PRS_TCAM_INV_WORD] & MVPP2_PRS_TCAM_INV_MASK)
                return MVPP2_PRS_TCAM_ENTRY_INVALID;
 
        for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++)
-               pe->tcam.word[i] = mvpp2_read(priv, MVPP2_PRS_TCAM_DATA_REG(i));
+               pe->tcam[i] = mvpp2_read(priv, MVPP2_PRS_TCAM_DATA_REG(i));
 
        /* Write sram index - indirect access */
        mvpp2_write(priv, MVPP2_PRS_SRAM_IDX_REG, pe->index);
        for (i = 0; i < MVPP2_PRS_SRAM_WORDS; i++)
-               pe->sram.word[i] = mvpp2_read(priv, MVPP2_PRS_SRAM_DATA_REG(i));
+               pe->sram[i] = mvpp2_read(priv, MVPP2_PRS_SRAM_DATA_REG(i));
 
        return 0;
 }
@@ -103,42 +100,35 @@ static void mvpp2_prs_shadow_ri_set(struct mvpp2 *priv, int index,
 /* Update lookup field in tcam sw entry */
 static void mvpp2_prs_tcam_lu_set(struct mvpp2_prs_entry *pe, unsigned int lu)
 {
-       int enable_off = MVPP2_PRS_TCAM_EN_OFFS(MVPP2_PRS_TCAM_LU_BYTE);
-
-       pe->tcam.byte[MVPP2_PRS_TCAM_LU_BYTE] = lu;
-       pe->tcam.byte[enable_off] = MVPP2_PRS_LU_MASK;
+       pe->tcam[MVPP2_PRS_TCAM_LU_WORD] &= ~MVPP2_PRS_TCAM_LU(MVPP2_PRS_LU_MASK);
+       pe->tcam[MVPP2_PRS_TCAM_LU_WORD] &= ~MVPP2_PRS_TCAM_LU_EN(MVPP2_PRS_LU_MASK);
+       pe->tcam[MVPP2_PRS_TCAM_LU_WORD] |= MVPP2_PRS_TCAM_LU(lu & MVPP2_PRS_LU_MASK);
+       pe->tcam[MVPP2_PRS_TCAM_LU_WORD] |= MVPP2_PRS_TCAM_LU_EN(MVPP2_PRS_LU_MASK);
 }
 
 /* Update mask for single port in tcam sw entry */
 static void mvpp2_prs_tcam_port_set(struct mvpp2_prs_entry *pe,
                                    unsigned int port, bool add)
 {
-       int enable_off = MVPP2_PRS_TCAM_EN_OFFS(MVPP2_PRS_TCAM_PORT_BYTE);
-
        if (add)
-               pe->tcam.byte[enable_off] &= ~(1 << port);
+               pe->tcam[MVPP2_PRS_TCAM_PORT_WORD] &= ~MVPP2_PRS_TCAM_PORT_EN(BIT(port));
        else
-               pe->tcam.byte[enable_off] |= 1 << port;
+               pe->tcam[MVPP2_PRS_TCAM_PORT_WORD] |= MVPP2_PRS_TCAM_PORT_EN(BIT(port));
 }
 
 /* Update port map in tcam sw entry */
 static void mvpp2_prs_tcam_port_map_set(struct mvpp2_prs_entry *pe,
                                        unsigned int ports)
 {
-       unsigned char port_mask = MVPP2_PRS_PORT_MASK;
-       int enable_off = MVPP2_PRS_TCAM_EN_OFFS(MVPP2_PRS_TCAM_PORT_BYTE);
-
-       pe->tcam.byte[MVPP2_PRS_TCAM_PORT_BYTE] = 0;
-       pe->tcam.byte[enable_off] &= ~port_mask;
-       pe->tcam.byte[enable_off] |= ~ports & MVPP2_PRS_PORT_MASK;
+       pe->tcam[MVPP2_PRS_TCAM_PORT_WORD] &= ~MVPP2_PRS_TCAM_PORT(MVPP2_PRS_PORT_MASK);
+       pe->tcam[MVPP2_PRS_TCAM_PORT_WORD] &= ~MVPP2_PRS_TCAM_PORT_EN(MVPP2_PRS_PORT_MASK);
+       pe->tcam[MVPP2_PRS_TCAM_PORT_WORD] |= MVPP2_PRS_TCAM_PORT_EN(~ports & MVPP2_PRS_PORT_MASK);
 }
 
 /* Obtain port map from tcam sw entry */
-static unsigned int mvpp2_prs_tcam_port_map_get(struct mvpp2_prs_entry *pe)
+unsigned int mvpp2_prs_tcam_port_map_get(struct mvpp2_prs_entry *pe)
 {
-       int enable_off = MVPP2_PRS_TCAM_EN_OFFS(MVPP2_PRS_TCAM_PORT_BYTE);
-
-       return ~(pe->tcam.byte[enable_off]) & MVPP2_PRS_PORT_MASK;
+       return (~pe->tcam[MVPP2_PRS_TCAM_PORT_WORD] >> 24) & MVPP2_PRS_PORT_MASK;
 }
 
 /* Set byte of data and its enable bits in tcam sw entry */
@@ -146,55 +136,58 @@ static void mvpp2_prs_tcam_data_byte_set(struct mvpp2_prs_entry *pe,
                                         unsigned int offs, unsigned char byte,
                                         unsigned char enable)
 {
-       pe->tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE(offs)] = byte;
-       pe->tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE_EN(offs)] = enable;
+       int pos = MVPP2_PRS_BYTE_IN_WORD(offs) * BITS_PER_BYTE;
+
+       pe->tcam[MVPP2_PRS_BYTE_TO_WORD(offs)] &= ~(0xff << pos);
+       pe->tcam[MVPP2_PRS_BYTE_TO_WORD(offs)] &= ~(MVPP2_PRS_TCAM_EN(0xff) << pos);
+       pe->tcam[MVPP2_PRS_BYTE_TO_WORD(offs)] |= byte << pos;
+       pe->tcam[MVPP2_PRS_BYTE_TO_WORD(offs)] |= MVPP2_PRS_TCAM_EN(enable << pos);
 }
 
 /* Get byte of data and its enable bits from tcam sw entry */
-static void mvpp2_prs_tcam_data_byte_get(struct mvpp2_prs_entry *pe,
-                                        unsigned int offs, unsigned char *byte,
-                                        unsigned char *enable)
+void mvpp2_prs_tcam_data_byte_get(struct mvpp2_prs_entry *pe,
+                                 unsigned int offs, unsigned char *byte,
+                                 unsigned char *enable)
 {
-       *byte = pe->tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE(offs)];
-       *enable = pe->tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE_EN(offs)];
+       int pos = MVPP2_PRS_BYTE_IN_WORD(offs) * BITS_PER_BYTE;
+
+       *byte = (pe->tcam[MVPP2_PRS_BYTE_TO_WORD(offs)] >> pos) & 0xff;
+       *enable = (pe->tcam[MVPP2_PRS_BYTE_TO_WORD(offs)] >> (pos + 16)) & 0xff;
 }
 
 /* Compare tcam data bytes with a pattern */
 static bool mvpp2_prs_tcam_data_cmp(struct mvpp2_prs_entry *pe, int offs,
                                    u16 data)
 {
-       int off = MVPP2_PRS_TCAM_DATA_BYTE(offs);
        u16 tcam_data;
 
-       tcam_data = (pe->tcam.byte[off + 1] << 8) | pe->tcam.byte[off];
-       if (tcam_data != data)
-               return false;
-       return true;
+       tcam_data = pe->tcam[MVPP2_PRS_BYTE_TO_WORD(offs)] & 0xffff;
+       return tcam_data == data;
 }
 
 /* Update ai bits in tcam sw entry */
 static void mvpp2_prs_tcam_ai_update(struct mvpp2_prs_entry *pe,
                                     unsigned int bits, unsigned int enable)
 {
-       int i, ai_idx = MVPP2_PRS_TCAM_AI_BYTE;
+       int i;
 
        for (i = 0; i < MVPP2_PRS_AI_BITS; i++) {
                if (!(enable & BIT(i)))
                        continue;
 
                if (bits & BIT(i))
-                       pe->tcam.byte[ai_idx] |= 1 << i;
+                       pe->tcam[MVPP2_PRS_TCAM_AI_WORD] |= BIT(i);
                else
-                       pe->tcam.byte[ai_idx] &= ~(1 << i);
+                       pe->tcam[MVPP2_PRS_TCAM_AI_WORD] &= ~BIT(i);
        }
 
-       pe->tcam.byte[MVPP2_PRS_TCAM_EN_OFFS(ai_idx)] |= enable;
+       pe->tcam[MVPP2_PRS_TCAM_AI_WORD] |= MVPP2_PRS_TCAM_AI_EN(enable);
 }
 
 /* Get ai bits from tcam sw entry */
 static int mvpp2_prs_tcam_ai_get(struct mvpp2_prs_entry *pe)
 {
-       return pe->tcam.byte[MVPP2_PRS_TCAM_AI_BYTE];
+       return pe->tcam[MVPP2_PRS_TCAM_AI_WORD] & MVPP2_PRS_AI_MASK;
 }
 
 /* Set ethertype in tcam sw entry */
@@ -215,16 +208,16 @@ static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
 
 /* Set bits in sram sw entry */
 static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
-                                   int val)
+                                   u32 val)
 {
-       pe->sram.byte[MVPP2_BIT_TO_BYTE(bit_num)] |= (val << (bit_num % 8));
+       pe->sram[MVPP2_BIT_TO_WORD(bit_num)] |= (val << (MVPP2_BIT_IN_WORD(bit_num)));
 }
 
 /* Clear bits in sram sw entry */
 static void mvpp2_prs_sram_bits_clear(struct mvpp2_prs_entry *pe, int bit_num,
-                                     int val)
+                                     u32 val)
 {
-       pe->sram.byte[MVPP2_BIT_TO_BYTE(bit_num)] &= ~(val << (bit_num % 8));
+       pe->sram[MVPP2_BIT_TO_WORD(bit_num)] &= ~(val << (MVPP2_BIT_IN_WORD(bit_num)));
 }
 
 /* Update ri bits in sram sw entry */
@@ -234,15 +227,16 @@ static void mvpp2_prs_sram_ri_update(struct mvpp2_prs_entry *pe,
        unsigned int i;
 
        for (i = 0; i < MVPP2_PRS_SRAM_RI_CTRL_BITS; i++) {
-               int ri_off = MVPP2_PRS_SRAM_RI_OFFS;
-
                if (!(mask & BIT(i)))
                        continue;
 
                if (bits & BIT(i))
-                       mvpp2_prs_sram_bits_set(pe, ri_off + i, 1);
+                       mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_RI_OFFS + i,
+                                               1);
                else
-                       mvpp2_prs_sram_bits_clear(pe, ri_off + i, 1);
+                       mvpp2_prs_sram_bits_clear(pe,
+                                                 MVPP2_PRS_SRAM_RI_OFFS + i,
+                                                 1);
 
                mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_RI_CTRL_OFFS + i, 1);
        }
@@ -251,7 +245,7 @@ static void mvpp2_prs_sram_ri_update(struct mvpp2_prs_entry *pe,
 /* Obtain ri bits from sram sw entry */
 static int mvpp2_prs_sram_ri_get(struct mvpp2_prs_entry *pe)
 {
-       return pe->sram.word[MVPP2_PRS_SRAM_RI_WORD];
+       return pe->sram[MVPP2_PRS_SRAM_RI_WORD];
 }
 
 /* Update ai bits in sram sw entry */
@@ -259,16 +253,18 @@ static void mvpp2_prs_sram_ai_update(struct mvpp2_prs_entry *pe,
                                     unsigned int bits, unsigned int mask)
 {
        unsigned int i;
-       int ai_off = MVPP2_PRS_SRAM_AI_OFFS;
 
        for (i = 0; i < MVPP2_PRS_SRAM_AI_CTRL_BITS; i++) {
                if (!(mask & BIT(i)))
                        continue;
 
                if (bits & BIT(i))
-                       mvpp2_prs_sram_bits_set(pe, ai_off + i, 1);
+                       mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_AI_OFFS + i,
+                                               1);
                else
-                       mvpp2_prs_sram_bits_clear(pe, ai_off + i, 1);
+                       mvpp2_prs_sram_bits_clear(pe,
+                                                 MVPP2_PRS_SRAM_AI_OFFS + i,
+                                                 1);
 
                mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_AI_CTRL_OFFS + i, 1);
        }
@@ -278,12 +274,12 @@ static void mvpp2_prs_sram_ai_update(struct mvpp2_prs_entry *pe,
 static int mvpp2_prs_sram_ai_get(struct mvpp2_prs_entry *pe)
 {
        u8 bits;
-       int ai_off = MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_AI_OFFS);
-       int ai_en_off = ai_off + 1;
-       int ai_shift = MVPP2_PRS_SRAM_AI_OFFS % 8;
+       /* ai is stored on bits 90->97; so it spreads across two u32 */
+       int ai_off = MVPP2_BIT_TO_WORD(MVPP2_PRS_SRAM_AI_OFFS);
+       int ai_shift = MVPP2_BIT_IN_WORD(MVPP2_PRS_SRAM_AI_OFFS);
 
-       bits = (pe->sram.byte[ai_off] >> ai_shift) |
-              (pe->sram.byte[ai_en_off] << (8 - ai_shift));
+       bits = (pe->sram[ai_off] >> ai_shift) |
+              (pe->sram[ai_off + 1] << (32 - ai_shift));
 
        return bits;
 }
@@ -316,8 +312,7 @@ static void mvpp2_prs_sram_shift_set(struct mvpp2_prs_entry *pe, int shift,
        }
 
        /* Set value */
-       pe->sram.byte[MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_SHIFT_OFFS)] =
-                                                          (unsigned char)shift;
+       pe->sram[MVPP2_BIT_TO_WORD(MVPP2_PRS_SRAM_SHIFT_OFFS)] = shift & MVPP2_PRS_SRAM_SHIFT_MASK;
 
        /* Reset and set operation */
        mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_OP_SEL_SHIFT_OFFS,
@@ -346,13 +341,8 @@ static void mvpp2_prs_sram_offset_set(struct mvpp2_prs_entry *pe,
        /* Set value */
        mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_UDF_OFFS,
                                  MVPP2_PRS_SRAM_UDF_MASK);
-       mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_UDF_OFFS, offset);
-       pe->sram.byte[MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_UDF_OFFS +
-                                       MVPP2_PRS_SRAM_UDF_BITS)] &=
-             ~(MVPP2_PRS_SRAM_UDF_MASK >> (8 - (MVPP2_PRS_SRAM_UDF_OFFS % 8)));
-       pe->sram.byte[MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_UDF_OFFS +
-                                       MVPP2_PRS_SRAM_UDF_BITS)] |=
-                               (offset >> (8 - (MVPP2_PRS_SRAM_UDF_OFFS % 8)));
+       mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_UDF_OFFS,
+                               offset & MVPP2_PRS_SRAM_UDF_MASK);
 
        /* Set offset type */
        mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_UDF_TYPE_OFFS,
@@ -362,16 +352,8 @@ static void mvpp2_prs_sram_offset_set(struct mvpp2_prs_entry *pe,
        /* Set offset operation */
        mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS,
                                  MVPP2_PRS_SRAM_OP_SEL_UDF_MASK);
-       mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS, op);
-
-       pe->sram.byte[MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS +
-                                       MVPP2_PRS_SRAM_OP_SEL_UDF_BITS)] &=
-                                            ~(MVPP2_PRS_SRAM_OP_SEL_UDF_MASK >>
-                                   (8 - (MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS % 8)));
-
-       pe->sram.byte[MVPP2_BIT_TO_BYTE(MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS +
-                                       MVPP2_PRS_SRAM_OP_SEL_UDF_BITS)] |=
-                            (op >> (8 - (MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS % 8)));
+       mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_OP_SEL_UDF_OFFS,
+                               op & MVPP2_PRS_SRAM_OP_SEL_UDF_MASK);
 
        /* Set base offset as current */
        mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_OP_SEL_BASE_OFFS, 1);
@@ -662,7 +644,7 @@ static int mvpp2_prs_vlan_find(struct mvpp2 *priv, unsigned short tpid, int ai)
                        continue;
 
                mvpp2_prs_init_from_hw(priv, &pe, tid);
-               match = mvpp2_prs_tcam_data_cmp(&pe, 0, swab16(tpid));
+               match = mvpp2_prs_tcam_data_cmp(&pe, 0, tpid);
                if (!match)
                        continue;
 
@@ -790,8 +772,8 @@ static int mvpp2_prs_double_vlan_find(struct mvpp2 *priv, unsigned short tpid1,
 
                mvpp2_prs_init_from_hw(priv, &pe, tid);
 
-               match = mvpp2_prs_tcam_data_cmp(&pe, 0, swab16(tpid1)) &&
-                       mvpp2_prs_tcam_data_cmp(&pe, 4, swab16(tpid2));
+               match = mvpp2_prs_tcam_data_cmp(&pe, 0, tpid1) &&
+                       mvpp2_prs_tcam_data_cmp(&pe, 4, tpid2);
 
                if (!match)
                        continue;
@@ -932,8 +914,8 @@ static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto,
 
        pe.index = tid;
        /* Clear ri before updating */
-       pe.sram.word[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
-       pe.sram.word[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
+       pe.sram[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
+       pe.sram[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
        mvpp2_prs_sram_ri_update(&pe, ri, ri_mask);
 
        mvpp2_prs_sram_ri_update(&pe, ri | MVPP2_PRS_RI_IP_FRAG_TRUE,
@@ -1433,17 +1415,13 @@ static int mvpp2_prs_etype_init(struct mvpp2 *priv)
 
        pe.index = tid;
 
-       /* Clear tcam data before updating */
-       pe.tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE(MVPP2_ETH_TYPE_LEN)] = 0x0;
-       pe.tcam.byte[MVPP2_PRS_TCAM_DATA_BYTE_EN(MVPP2_ETH_TYPE_LEN)] = 0x0;
-
        mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
                                     MVPP2_PRS_IPV4_HEAD,
                                     MVPP2_PRS_IPV4_HEAD_MASK);
 
        /* Clear ri before updating */
-       pe.sram.word[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
-       pe.sram.word[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
+       pe.sram[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
+       pe.sram[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
        mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT,
                                 MVPP2_PRS_RI_L3_PROTO_MASK);
 
@@ -1644,8 +1622,8 @@ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv)
                                     MVPP2_PRS_IPV4_IHL_MASK);
 
        /* Clear ri before updating */
-       pe.sram.word[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
-       pe.sram.word[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
+       pe.sram[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
+       pe.sram[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
        mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4,
                                 MVPP2_PRS_RI_L3_PROTO_MASK);
 
@@ -2428,6 +2406,41 @@ int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type)
        return 0;
 }
 
+int mvpp2_prs_add_flow(struct mvpp2 *priv, int flow, u32 ri, u32 ri_mask)
+{
+       struct mvpp2_prs_entry pe;
+       u8 *ri_byte, *ri_byte_mask;
+       int tid, i;
+
+       memset(&pe, 0, sizeof(pe));
+
+       tid = mvpp2_prs_tcam_first_free(priv,
+                                       MVPP2_PE_LAST_FREE_TID,
+                                       MVPP2_PE_FIRST_FREE_TID);
+       if (tid < 0)
+               return tid;
+
+       pe.index = tid;
+
+       ri_byte = (u8 *)&ri;
+       ri_byte_mask = (u8 *)&ri_mask;
+
+       mvpp2_prs_sram_ai_update(&pe, flow, MVPP2_PRS_FLOW_ID_MASK);
+       mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_DONE_BIT, 1);
+
+       for (i = 0; i < 4; i++) {
+               mvpp2_prs_tcam_data_byte_set(&pe, i, ri_byte[i],
+                                            ri_byte_mask[i]);
+       }
+
+       mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_FLOWS);
+       mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+       mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+       mvpp2_prs_hw_write(priv, &pe);
+
+       return 0;
+}
+
 /* Set prs flow for the port */
 int mvpp2_prs_def_flow(struct mvpp2_port *port)
 {
@@ -2465,3 +2478,19 @@ int mvpp2_prs_def_flow(struct mvpp2_port *port)
 
        return 0;
 }
+
+int mvpp2_prs_hits(struct mvpp2 *priv, int index)
+{
+       u32 val;
+
+       if (index > MVPP2_PRS_TCAM_SRAM_SIZE)
+               return -EINVAL;
+
+       mvpp2_write(priv, MVPP2_PRS_TCAM_HIT_IDX_REG, index);
+
+       val = mvpp2_read(priv, MVPP2_PRS_TCAM_HIT_CNT_REG);
+
+       val &= MVPP2_PRS_TCAM_HIT_CNT_MASK;
+
+       return val;
+}
index 22fbbc4c8b2805ad7545978864bae24ae862b423..e22f6c85d380346312147daf531bf6c3626e9589 100644 (file)
@@ -1,22 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Header Parser definitions for Marvell PPv2 Network Controller
  *
  * Copyright (C) 2014 Marvell
  *
  * Marcin Wojtas <mw@semihalf.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
+#ifndef _MVPP2_PRS_H_
+#define _MVPP2_PRS_H_
+
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 
 #include "mvpp2.h"
 
-#ifndef _MVPP2_PRS_H_
-#define _MVPP2_PRS_H_
-
 /* Parser constants */
 #define MVPP2_PRS_TCAM_SRAM_SIZE       256
 #define MVPP2_PRS_TCAM_WORDS           6
  * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(5)->(0).
  */
 #define MVPP2_PRS_AI_BITS                      8
+#define MVPP2_PRS_AI_MASK                      0xff
 #define MVPP2_PRS_PORT_MASK                    0xff
 #define MVPP2_PRS_LU_MASK                      0xf
-#define MVPP2_PRS_TCAM_DATA_BYTE(offs)         \
-                                   (((offs) - ((offs) % 2)) * 2 + ((offs) % 2))
-#define MVPP2_PRS_TCAM_DATA_BYTE_EN(offs)      \
-                                             (((offs) * 2) - ((offs) % 2)  + 2)
-#define MVPP2_PRS_TCAM_AI_BYTE                 16
-#define MVPP2_PRS_TCAM_PORT_BYTE               17
-#define MVPP2_PRS_TCAM_LU_BYTE                 20
-#define MVPP2_PRS_TCAM_EN_OFFS(offs)           ((offs) + 2)
-#define MVPP2_PRS_TCAM_INV_WORD                        5
+
+/* TCAM entries in registers are accessed using 16 data bits + 16 enable bits */
+#define MVPP2_PRS_BYTE_TO_WORD(byte)   ((byte) / 2)
+#define MVPP2_PRS_BYTE_IN_WORD(byte)   ((byte) % 2)
+
+#define MVPP2_PRS_TCAM_EN(data)                ((data) << 16)
+#define MVPP2_PRS_TCAM_AI_WORD         4
+#define MVPP2_PRS_TCAM_AI(ai)          (ai)
+#define MVPP2_PRS_TCAM_AI_EN(ai)       MVPP2_PRS_TCAM_EN(MVPP2_PRS_TCAM_AI(ai))
+#define MVPP2_PRS_TCAM_PORT_WORD       4
+#define MVPP2_PRS_TCAM_PORT(p)         ((p) << 8)
+#define MVPP2_PRS_TCAM_PORT_EN(p)      MVPP2_PRS_TCAM_EN(MVPP2_PRS_TCAM_PORT(p))
+#define MVPP2_PRS_TCAM_LU_WORD         5
+#define MVPP2_PRS_TCAM_LU(lu)          (lu)
+#define MVPP2_PRS_TCAM_LU_EN(lu)       MVPP2_PRS_TCAM_EN(MVPP2_PRS_TCAM_LU(lu))
+#define MVPP2_PRS_TCAM_INV_WORD                5
 
 #define MVPP2_PRS_VID_TCAM_BYTE         2
 
 #define MVPP2_PRS_SRAM_RI_CTRL_BITS            32
 #define MVPP2_PRS_SRAM_SHIFT_OFFS              64
 #define MVPP2_PRS_SRAM_SHIFT_SIGN_BIT          72
+#define MVPP2_PRS_SRAM_SHIFT_MASK              0xff
 #define MVPP2_PRS_SRAM_UDF_OFFS                        73
 #define MVPP2_PRS_SRAM_UDF_BITS                        8
 #define MVPP2_PRS_SRAM_UDF_MASK                        0xff
 #define MVPP2_PRS_RI_UDF7_IP6_LITE             BIT(29)
 #define MVPP2_PRS_RI_DROP_MASK                 0x80000000
 
+#define MVPP2_PRS_IP_MASK                      (MVPP2_PRS_RI_L3_PROTO_MASK | \
+                                               MVPP2_PRS_RI_IP_FRAG_MASK | \
+                                               MVPP2_PRS_RI_L4_PROTO_MASK)
+
 /* Sram additional info bits assignment */
 #define MVPP2_PRS_IPV4_DIP_AI_BIT              BIT(0)
 #define MVPP2_PRS_IPV6_NO_EXT_AI_BIT           BIT(0)
@@ -255,20 +266,15 @@ enum mvpp2_prs_lookup {
        MVPP2_PRS_LU_LAST,
 };
 
-union mvpp2_prs_tcam_entry {
-       u32 word[MVPP2_PRS_TCAM_WORDS];
-       u8  byte[MVPP2_PRS_TCAM_WORDS * 4];
-};
-
-union mvpp2_prs_sram_entry {
-       u32 word[MVPP2_PRS_SRAM_WORDS];
-       u8  byte[MVPP2_PRS_SRAM_WORDS * 4];
-};
-
 struct mvpp2_prs_entry {
        u32 index;
-       union mvpp2_prs_tcam_entry tcam;
-       union mvpp2_prs_sram_entry sram;
+       u32 tcam[MVPP2_PRS_TCAM_WORDS];
+       u32 sram[MVPP2_PRS_SRAM_WORDS];
+};
+
+struct mvpp2_prs_result_info {
+       u32 ri;
+       u32 ri_mask;
 };
 
 struct mvpp2_prs_shadow {
@@ -288,10 +294,21 @@ struct mvpp2_prs_shadow {
 
 int mvpp2_prs_default_init(struct platform_device *pdev, struct mvpp2 *priv);
 
+int mvpp2_prs_init_from_hw(struct mvpp2 *priv, struct mvpp2_prs_entry *pe,
+                          int tid);
+
+unsigned int mvpp2_prs_tcam_port_map_get(struct mvpp2_prs_entry *pe);
+
+void mvpp2_prs_tcam_data_byte_get(struct mvpp2_prs_entry *pe,
+                                 unsigned int offs, unsigned char *byte,
+                                 unsigned char *enable);
+
 int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da, bool add);
 
 int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type);
 
+int mvpp2_prs_add_flow(struct mvpp2 *priv, int flow, u32 ri, u32 ri_mask);
+
 int mvpp2_prs_def_flow(struct mvpp2_port *port);
 
 void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port);
@@ -311,4 +328,6 @@ void mvpp2_prs_mac_del_all(struct mvpp2_port *port);
 
 int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da);
 
+int mvpp2_prs_hits(struct mvpp2 *priv, int index);
+
 #endif
index d8ebf0a05e0c4fc99387338db9962bbab3f7472e..6e6abdc399deb3c1dc4662faa082e170e2a212f6 100644 (file)
@@ -605,10 +605,10 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
        dma_addr_t dma_addr;
        int i;
 
-       eth->scratch_ring = dma_alloc_coherent(eth->dev,
-                                              cnt * sizeof(struct mtk_tx_dma),
-                                              &eth->phy_scratch_ring,
-                                              GFP_ATOMIC | __GFP_ZERO);
+       eth->scratch_ring = dma_zalloc_coherent(eth->dev,
+                                               cnt * sizeof(struct mtk_tx_dma),
+                                               &eth->phy_scratch_ring,
+                                               GFP_ATOMIC);
        if (unlikely(!eth->scratch_ring))
                return -ENOMEM;
 
@@ -623,7 +623,6 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
        if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
                return -ENOMEM;
 
-       memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt);
        phy_ring_tail = eth->phy_scratch_ring +
                        (sizeof(struct mtk_tx_dma) * (cnt - 1));
 
@@ -1221,14 +1220,11 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
        if (!ring->buf)
                goto no_tx_mem;
 
-       ring->dma = dma_alloc_coherent(eth->dev,
-                                         MTK_DMA_SIZE * sz,
-                                         &ring->phys,
-                                         GFP_ATOMIC | __GFP_ZERO);
+       ring->dma = dma_zalloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
+                                       &ring->phys, GFP_ATOMIC);
        if (!ring->dma)
                goto no_tx_mem;
 
-       memset(ring->dma, 0, MTK_DMA_SIZE * sz);
        for (i = 0; i < MTK_DMA_SIZE; i++) {
                int next = (i + 1) % MTK_DMA_SIZE;
                u32 next_ptr = ring->phys + next * sz;
@@ -1321,10 +1317,9 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
                        return -ENOMEM;
        }
 
-       ring->dma = dma_alloc_coherent(eth->dev,
-                                      rx_dma_size * sizeof(*ring->dma),
-                                      &ring->phys,
-                                      GFP_ATOMIC | __GFP_ZERO);
+       ring->dma = dma_zalloc_coherent(eth->dev,
+                                       rx_dma_size * sizeof(*ring->dma),
+                                       &ring->phys, GFP_ATOMIC);
        if (!ring->dma)
                return -ENOMEM;
 
@@ -2463,42 +2458,6 @@ free_netdev:
        return err;
 }
 
-static int mtk_get_chip_id(struct mtk_eth *eth, u32 *chip_id)
-{
-       u32 val[2], id[4];
-
-       regmap_read(eth->ethsys, ETHSYS_CHIPID0_3, &val[0]);
-       regmap_read(eth->ethsys, ETHSYS_CHIPID4_7, &val[1]);
-
-       id[3] = ((val[0] >> 16) & 0xff) - '0';
-       id[2] = ((val[0] >> 24) & 0xff) - '0';
-       id[1] = (val[1] & 0xff) - '0';
-       id[0] = ((val[1] >> 8) & 0xff) - '0';
-
-       *chip_id = (id[3] * 1000) + (id[2] * 100) +
-                  (id[1] * 10) + id[0];
-
-       if (!(*chip_id)) {
-               dev_err(eth->dev, "failed to get chip id\n");
-               return -ENODEV;
-       }
-
-       dev_info(eth->dev, "chip id = %d\n", *chip_id);
-
-       return 0;
-}
-
-static bool mtk_is_hwlro_supported(struct mtk_eth *eth)
-{
-       switch (eth->chip_id) {
-       case MT7622_ETH:
-       case MT7623_ETH:
-               return true;
-       }
-
-       return false;
-}
-
 static int mtk_probe(struct platform_device *pdev)
 {
        struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2577,11 +2536,7 @@ static int mtk_probe(struct platform_device *pdev)
        if (err)
                return err;
 
-       err = mtk_get_chip_id(eth, &eth->chip_id);
-       if (err)
-               return err;
-
-       eth->hwlro = mtk_is_hwlro_supported(eth);
+       eth->hwlro = MTK_HAS_CAPS(eth->soc->caps, MTK_HWLRO);
 
        for_each_child_of_node(pdev->dev.of_node, mac_np) {
                if (!of_device_is_compatible(mac_np,
@@ -2670,19 +2625,19 @@ static int mtk_remove(struct platform_device *pdev)
 }
 
 static const struct mtk_soc_data mt2701_data = {
-       .caps = MTK_GMAC1_TRGMII,
+       .caps = MTK_GMAC1_TRGMII | MTK_HWLRO,
        .required_clks = MT7623_CLKS_BITMAP,
        .required_pctl = true,
 };
 
 static const struct mtk_soc_data mt7622_data = {
-       .caps = MTK_DUAL_GMAC_SHARED_SGMII | MTK_GMAC1_ESW,
+       .caps = MTK_DUAL_GMAC_SHARED_SGMII | MTK_GMAC1_ESW | MTK_HWLRO,
        .required_clks = MT7622_CLKS_BITMAP,
        .required_pctl = false,
 };
 
 static const struct mtk_soc_data mt7623_data = {
-       .caps = MTK_GMAC1_TRGMII,
+       .caps = MTK_GMAC1_TRGMII | MTK_HWLRO,
        .required_clks = MT7623_CLKS_BITMAP,
        .required_pctl = true,
 };
index 672b8c353c47d92357722d728d7f0aa922c310b6..46819297fc3ed4f15934eb9bd8173aa2eec80a5b 100644 (file)
@@ -566,6 +566,7 @@ struct mtk_rx_ring {
 #define MTK_GMAC2_SGMII                        (BIT(10) | MTK_SGMII)
 #define MTK_DUAL_GMAC_SHARED_SGMII     (BIT(11) | MTK_GMAC1_SGMII | \
                                         MTK_GMAC2_SGMII)
+#define MTK_HWLRO                      BIT(12)
 #define MTK_HAS_CAPS(caps, _x)         (((caps) & (_x)) == (_x))
 
 /* struct mtk_eth_data -       This is the structure holding all differences
@@ -635,7 +636,6 @@ struct mtk_eth {
        struct regmap                   *ethsys;
        struct regmap                   *sgmiisys;
        struct regmap                   *pctl;
-       u32                             chip_id;
        bool                            hwlro;
        refcount_t                      dma_refcnt;
        struct mtk_tx_ring              tx_ring;
index 16b10d01fcf4bc826fc5d8c7d522cb32c38ab8d9..3f400770fcd8fb8251ed28877d757d75dad52f53 100644 (file)
@@ -3,7 +3,7 @@ obj-$(CONFIG_MLX4_CORE)         += mlx4_core.o
 
 mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \
                main.o mcg.o mr.o pd.o port.o profile.o qp.o reset.o sense.o \
-               srq.o resource_tracker.o
+               srq.o resource_tracker.o crdump.o
 
 obj-$(CONFIG_MLX4_EN)               += mlx4_en.o
 
index e2b6b0cac1acf9dad8168587743ccc5e871a7c92..c81d15bf259c83a8baf11c80d9857cd8759e3b8d 100644 (file)
@@ -178,10 +178,12 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
 
        dev = persist->dev;
        mlx4_err(dev, "device is going to be reset\n");
-       if (mlx4_is_slave(dev))
+       if (mlx4_is_slave(dev)) {
                err = mlx4_reset_slave(dev);
-       else
+       } else {
+               mlx4_crdump_collect(dev);
                err = mlx4_reset_master(dev);
+       }
 
        if (!err) {
                mlx4_err(dev, "device was reset successfully\n");
@@ -212,7 +214,7 @@ static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist)
        mutex_lock(&persist->interface_state_mutex);
        if (persist->interface_state & MLX4_INTERFACE_STATE_UP &&
            !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) {
-               err = mlx4_restart_one(persist->pdev);
+               err = mlx4_restart_one(persist->pdev, false, NULL);
                mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n",
                          err);
        }
diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
new file mode 100644 (file)
index 0000000..88316c7
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx4.h"
+
+#define BAD_ACCESS                     0xBADACCE5
+#define HEALTH_BUFFER_SIZE             0x40
+#define CR_ENABLE_BIT                  swab32(BIT(6))
+#define CR_ENABLE_BIT_OFFSET           0xF3F04
+#define MAX_NUM_OF_DUMPS_TO_STORE      (8)
+
+static const char *region_cr_space_str = "cr-space";
+static const char *region_fw_health_str = "fw-health";
+
+/* Set to true in case cr enable bit was set to true before crdump */
+static bool crdump_enbale_bit_set;
+
+static void crdump_enable_crspace_access(struct mlx4_dev *dev,
+                                        u8 __iomem *cr_space)
+{
+       /* Get current enable bit value */
+       crdump_enbale_bit_set =
+               readl(cr_space + CR_ENABLE_BIT_OFFSET) & CR_ENABLE_BIT;
+
+       /* Enable FW CR filter (set bit6 to 0) */
+       if (crdump_enbale_bit_set)
+               writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) & ~CR_ENABLE_BIT,
+                      cr_space + CR_ENABLE_BIT_OFFSET);
+
+       /* Enable block volatile crspace accesses */
+       writel(swab32(1), cr_space + dev->caps.health_buffer_addrs +
+              HEALTH_BUFFER_SIZE);
+}
+
+static void crdump_disable_crspace_access(struct mlx4_dev *dev,
+                                         u8 __iomem *cr_space)
+{
+       /* Disable block volatile crspace accesses */
+       writel(0, cr_space + dev->caps.health_buffer_addrs +
+              HEALTH_BUFFER_SIZE);
+
+       /* Restore FW CR filter value (set bit6 to original value) */
+       if (crdump_enbale_bit_set)
+               writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) | CR_ENABLE_BIT,
+                      cr_space + CR_ENABLE_BIT_OFFSET);
+}
+
+static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev,
+                                       u8 __iomem *cr_space,
+                                       u32 id)
+{
+       struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+       struct pci_dev *pdev = dev->persist->pdev;
+       unsigned long cr_res_size;
+       u8 *crspace_data;
+       int offset;
+       int err;
+
+       if (!crdump->region_crspace) {
+               mlx4_err(dev, "crdump: cr-space region is NULL\n");
+               return;
+       }
+
+       /* Try to collect CR space */
+       cr_res_size = pci_resource_len(pdev, 0);
+       crspace_data = kvmalloc(cr_res_size, GFP_KERNEL);
+       if (crspace_data) {
+               for (offset = 0; offset < cr_res_size; offset += 4)
+                       *(u32 *)(crspace_data + offset) =
+                                       readl(cr_space + offset);
+
+               err = devlink_region_snapshot_create(crdump->region_crspace,
+                                                    cr_res_size, crspace_data,
+                                                    id, &kvfree);
+               if (err) {
+                       kvfree(crspace_data);
+                       mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+                                 region_cr_space_str, id, err);
+               } else {
+                       mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+                                 id, region_cr_space_str);
+               }
+       } else {
+               mlx4_err(dev, "crdump: Failed to allocate crspace buffer\n");
+       }
+}
+
+static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev,
+                                         u8 __iomem *cr_space,
+                                         u32 id)
+{
+       struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+       u8 *health_data;
+       int offset;
+       int err;
+
+       if (!crdump->region_fw_health) {
+               mlx4_err(dev, "crdump: fw-health region is NULL\n");
+               return;
+       }
+
+       /* Try to collect health buffer */
+       health_data = kvmalloc(HEALTH_BUFFER_SIZE, GFP_KERNEL);
+       if (health_data) {
+               u8 __iomem *health_buf_start =
+                               cr_space + dev->caps.health_buffer_addrs;
+
+               for (offset = 0; offset < HEALTH_BUFFER_SIZE; offset += 4)
+                       *(u32 *)(health_data + offset) =
+                                       readl(health_buf_start + offset);
+
+               err = devlink_region_snapshot_create(crdump->region_fw_health,
+                                                    HEALTH_BUFFER_SIZE,
+                                                    health_data,
+                                                    id, &kvfree);
+               if (err) {
+                       kvfree(health_data);
+                       mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+                                 region_fw_health_str, id, err);
+               } else {
+                       mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+                                 id, region_fw_health_str);
+               }
+       } else {
+               mlx4_err(dev, "crdump: Failed to allocate health buffer\n");
+       }
+}
+
+int mlx4_crdump_collect(struct mlx4_dev *dev)
+{
+       struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+       struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+       struct pci_dev *pdev = dev->persist->pdev;
+       unsigned long cr_res_size;
+       u8 __iomem *cr_space;
+       u32 id;
+
+       if (!dev->caps.health_buffer_addrs) {
+               mlx4_info(dev, "crdump: FW doesn't support health buffer access, skipping\n");
+               return 0;
+       }
+
+       if (!crdump->snapshot_enable) {
+               mlx4_info(dev, "crdump: devlink snapshot disabled, skipping\n");
+               return 0;
+       }
+
+       cr_res_size = pci_resource_len(pdev, 0);
+
+       cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size);
+       if (!cr_space) {
+               mlx4_err(dev, "crdump: Failed to map pci cr region\n");
+               return -ENODEV;
+       }
+
+       crdump_enable_crspace_access(dev, cr_space);
+
+       /* Get the available snapshot ID for the dumps */
+       id = devlink_region_shapshot_id_get(devlink);
+
+       /* Try to capture dumps */
+       mlx4_crdump_collect_crspace(dev, cr_space, id);
+       mlx4_crdump_collect_fw_health(dev, cr_space, id);
+
+       crdump_disable_crspace_access(dev, cr_space);
+
+       iounmap(cr_space);
+       return 0;
+}
+
+int mlx4_crdump_init(struct mlx4_dev *dev)
+{
+       struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+       struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+       struct pci_dev *pdev = dev->persist->pdev;
+
+       crdump->snapshot_enable = false;
+
+       /* Create cr-space region */
+       crdump->region_crspace =
+               devlink_region_create(devlink,
+                                     region_cr_space_str,
+                                     MAX_NUM_OF_DUMPS_TO_STORE,
+                                     pci_resource_len(pdev, 0));
+       if (IS_ERR(crdump->region_crspace))
+               mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
+                         region_cr_space_str,
+                         PTR_ERR(crdump->region_crspace));
+
+       /* Create fw-health region */
+       crdump->region_fw_health =
+               devlink_region_create(devlink,
+                                     region_fw_health_str,
+                                     MAX_NUM_OF_DUMPS_TO_STORE,
+                                     HEALTH_BUFFER_SIZE);
+       if (IS_ERR(crdump->region_fw_health))
+               mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
+                         region_fw_health_str,
+                         PTR_ERR(crdump->region_fw_health));
+
+       return 0;
+}
+
+void mlx4_crdump_end(struct mlx4_dev *dev)
+{
+       struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+
+       devlink_region_destroy(crdump->region_fw_health);
+       devlink_region_destroy(crdump->region_crspace);
+}
index 65eb06e017e401237842503bc3aabad3780c1a2e..6785661d1a72627d7cc6895359e0ece284577d96 100644 (file)
@@ -2926,7 +2926,6 @@ static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp)
                return mlx4_xdp_set(dev, xdp->prog);
        case XDP_QUERY_PROG:
                xdp->prog_id = mlx4_xdp_query(dev);
-               xdp->prog_attached = !!xdp->prog_id;
                return 0;
        default:
                return -EINVAL;
index 0227786308af5d70bdfbb19da3fb8d5760d0651f..1857ee0f0871d48285a6d3711f7c3e9a1e08a05f 100644 (file)
@@ -688,15 +688,16 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
 }
 
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-                        void *accel_priv, select_queue_fallback_t fallback)
+                        struct net_device *sb_dev,
+                        select_queue_fallback_t fallback)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
        u16 rings_p_up = priv->num_tx_rings_p_up;
 
        if (netdev_get_num_tc(dev))
-               return fallback(dev, skb);
+               return fallback(dev, skb, NULL);
 
-       return fallback(dev, skb) % rings_p_up;
+       return fallback(dev, skb, NULL) % rings_p_up;
 }
 
 static void mlx4_bf_copy(void __iomem *dst, const void *src,
index 46dcbfbe4c5eb0133ce49c2d222ebd7a2c8e7d44..babcfd9c0571fc6ffac47bd222304b62c65824c1 100644 (file)
@@ -825,7 +825,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET 0xcc
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET 0xd0
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET 0xd2
-
+#define QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET     0xe4
 
        dev_cap->flags2 = 0;
        mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -1082,6 +1082,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
                dev_cap->rl_caps.min_unit = size >> 14;
        }
 
+       MLX4_GET(dev_cap->health_buffer_addrs, outbox,
+                QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET);
+
        MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
        if (field32 & (1 << 16))
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
index cd6399c76bfdb51887a1eba84150b642ef03cc30..650ae08c71def539ed50db6103a6c036170d1eae 100644 (file)
@@ -128,6 +128,7 @@ struct mlx4_dev_cap {
        u32 dmfs_high_rate_qpn_base;
        u32 dmfs_high_rate_qpn_range;
        struct mlx4_rate_limit_caps rl_caps;
+       u32 health_buffer_addrs;
        struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
        bool wol_port[MLX4_MAX_PORTS + 1];
 };
index 872014702fc1b0def72197f9e1b505849a5c72da..d2d59444f5626c1d6e08fa7a8f5494b1554e0259 100644 (file)
@@ -159,9 +159,10 @@ static bool use_prio;
 module_param_named(use_prio, use_prio, bool, 0444);
 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
 
-int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
+int log_mtts_per_seg = ilog2(1);
 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
-MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
+MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment "
+                "(0-7) (default: 0)");
 
 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
 static int arr_argc = 2;
@@ -177,6 +178,131 @@ struct mlx4_port_config {
 
 static atomic_t pf_loading = ATOMIC_INIT(0);
 
+static int mlx4_devlink_ierr_reset_get(struct devlink *devlink, u32 id,
+                                      struct devlink_param_gset_ctx *ctx)
+{
+       ctx->val.vbool = !!mlx4_internal_err_reset;
+       return 0;
+}
+
+static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id,
+                                      struct devlink_param_gset_ctx *ctx)
+{
+       mlx4_internal_err_reset = ctx->val.vbool;
+       return 0;
+}
+
+static int mlx4_devlink_crdump_snapshot_get(struct devlink *devlink, u32 id,
+                                           struct devlink_param_gset_ctx *ctx)
+{
+       struct mlx4_priv *priv = devlink_priv(devlink);
+       struct mlx4_dev *dev = &priv->dev;
+
+       ctx->val.vbool = dev->persist->crdump.snapshot_enable;
+       return 0;
+}
+
+static int mlx4_devlink_crdump_snapshot_set(struct devlink *devlink, u32 id,
+                                           struct devlink_param_gset_ctx *ctx)
+{
+       struct mlx4_priv *priv = devlink_priv(devlink);
+       struct mlx4_dev *dev = &priv->dev;
+
+       dev->persist->crdump.snapshot_enable = ctx->val.vbool;
+       return 0;
+}
+
+static int
+mlx4_devlink_max_macs_validate(struct devlink *devlink, u32 id,
+                              union devlink_param_value val,
+                              struct netlink_ext_ack *extack)
+{
+       u32 value = val.vu32;
+
+       if (value < 1 || value > 128)
+               return -ERANGE;
+
+       if (!is_power_of_2(value)) {
+               NL_SET_ERR_MSG_MOD(extack, "max_macs supported must be power of 2");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+enum mlx4_devlink_param_id {
+       MLX4_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+       MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
+       MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
+};
+
+static const struct devlink_param mlx4_devlink_params[] = {
+       DEVLINK_PARAM_GENERIC(INT_ERR_RESET,
+                             BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
+                             BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+                             mlx4_devlink_ierr_reset_get,
+                             mlx4_devlink_ierr_reset_set, NULL),
+       DEVLINK_PARAM_GENERIC(MAX_MACS,
+                             BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+                             NULL, NULL, mlx4_devlink_max_macs_validate),
+       DEVLINK_PARAM_GENERIC(REGION_SNAPSHOT,
+                             BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
+                             BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+                             mlx4_devlink_crdump_snapshot_get,
+                             mlx4_devlink_crdump_snapshot_set, NULL),
+       DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
+                            "enable_64b_cqe_eqe", DEVLINK_PARAM_TYPE_BOOL,
+                            BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+                            NULL, NULL, NULL),
+       DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
+                            "enable_4k_uar", DEVLINK_PARAM_TYPE_BOOL,
+                            BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+                            NULL, NULL, NULL),
+};
+
+static void mlx4_devlink_set_init_value(struct devlink *devlink, u32 param_id,
+                                       union devlink_param_value init_val)
+{
+       struct mlx4_priv *priv = devlink_priv(devlink);
+       struct mlx4_dev *dev = &priv->dev;
+       int err;
+
+       err = devlink_param_driverinit_value_set(devlink, param_id, init_val);
+       if (err)
+               mlx4_warn(dev,
+                         "devlink set parameter %u value failed (err = %d)",
+                         param_id, err);
+}
+
+static void mlx4_devlink_set_params_init_values(struct devlink *devlink)
+{
+       union devlink_param_value value;
+
+       value.vbool = !!mlx4_internal_err_reset;
+       mlx4_devlink_set_init_value(devlink,
+                                   DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+                                   value);
+
+       value.vu32 = 1UL << log_num_mac;
+       mlx4_devlink_set_init_value(devlink,
+                                   DEVLINK_PARAM_GENERIC_ID_MAX_MACS, value);
+
+       value.vbool = enable_64b_cqe_eqe;
+       mlx4_devlink_set_init_value(devlink,
+                                   MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
+                                   value);
+
+       value.vbool = enable_4k_uar;
+       mlx4_devlink_set_init_value(devlink,
+                                   MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
+                                   value);
+
+       value.vbool = false;
+       mlx4_devlink_set_init_value(devlink,
+                                   DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+                                   value);
+}
+
 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
                                              struct mlx4_dev_cap *dev_cap)
 {
@@ -428,6 +554,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
        dev->caps.wol_port[1]          = dev_cap->wol_port[1];
        dev->caps.wol_port[2]          = dev_cap->wol_port[2];
+       dev->caps.health_buffer_addrs  = dev_cap->health_buffer_addrs;
 
        /* Save uar page shift */
        if (!mlx4_is_slave(dev)) {
@@ -3711,10 +3838,14 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
                }
        }
 
-       err = mlx4_catas_init(&priv->dev);
+       err = mlx4_crdump_init(&priv->dev);
        if (err)
                goto err_release_regions;
 
+       err = mlx4_catas_init(&priv->dev);
+       if (err)
+               goto err_crdump;
+
        err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
        if (err)
                goto err_catas;
@@ -3724,6 +3855,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 err_catas:
        mlx4_catas_end(&priv->dev);
 
+err_crdump:
+       mlx4_crdump_end(&priv->dev);
+
 err_release_regions:
        pci_release_regions(pdev);
 
@@ -3757,8 +3891,68 @@ static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
        return __set_port_type(info, mlx4_port_type);
 }
 
+static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
+{
+       struct mlx4_priv *priv = devlink_priv(devlink);
+       struct mlx4_dev *dev = &priv->dev;
+       struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+       union devlink_param_value saved_value;
+       int err;
+
+       err = devlink_param_driverinit_value_get(devlink,
+                                                DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+                                                &saved_value);
+       if (!err && mlx4_internal_err_reset != saved_value.vbool) {
+               mlx4_internal_err_reset = saved_value.vbool;
+               /* Notify on value changed on runtime configuration mode */
+               devlink_param_value_changed(devlink,
+                                           DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET);
+       }
+       err = devlink_param_driverinit_value_get(devlink,
+                                                DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+                                                &saved_value);
+       if (!err)
+               log_num_mac = order_base_2(saved_value.vu32);
+       err = devlink_param_driverinit_value_get(devlink,
+                                                MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
+                                                &saved_value);
+       if (!err)
+               enable_64b_cqe_eqe = saved_value.vbool;
+       err = devlink_param_driverinit_value_get(devlink,
+                                                MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
+                                                &saved_value);
+       if (!err)
+               enable_4k_uar = saved_value.vbool;
+       err = devlink_param_driverinit_value_get(devlink,
+                                                DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+                                                &saved_value);
+       if (!err && crdump->snapshot_enable != saved_value.vbool) {
+               crdump->snapshot_enable = saved_value.vbool;
+               devlink_param_value_changed(devlink,
+                                           DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT);
+       }
+}
+
+static int mlx4_devlink_reload(struct devlink *devlink,
+                              struct netlink_ext_ack *extack)
+{
+       struct mlx4_priv *priv = devlink_priv(devlink);
+       struct mlx4_dev *dev = &priv->dev;
+       struct mlx4_dev_persistent *persist = dev->persist;
+       int err;
+
+       if (persist->num_vfs)
+               mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
+       err = mlx4_restart_one(persist->pdev, true, devlink);
+       if (err)
+               mlx4_err(persist->dev, "mlx4_restart_one failed, ret=%d\n", err);
+
+       return err;
+}
+
 static const struct devlink_ops mlx4_devlink_ops = {
        .port_type_set  = mlx4_devlink_port_type_set,
+       .reload         = mlx4_devlink_reload,
 };
 
 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -3792,14 +3986,21 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        ret = devlink_register(devlink, &pdev->dev);
        if (ret)
                goto err_persist_free;
-
-       ret =  __mlx4_init_one(pdev, id->driver_data, priv);
+       ret = devlink_params_register(devlink, mlx4_devlink_params,
+                                     ARRAY_SIZE(mlx4_devlink_params));
        if (ret)
                goto err_devlink_unregister;
+       mlx4_devlink_set_params_init_values(devlink);
+       ret =  __mlx4_init_one(pdev, id->driver_data, priv);
+       if (ret)
+               goto err_params_unregister;
 
        pci_save_state(pdev);
        return 0;
 
+err_params_unregister:
+       devlink_params_unregister(devlink, mlx4_devlink_params,
+                                 ARRAY_SIZE(mlx4_devlink_params));
 err_devlink_unregister:
        devlink_unregister(devlink);
 err_persist_free:
@@ -3929,6 +4130,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
        else
                mlx4_info(dev, "%s: interface is down\n", __func__);
        mlx4_catas_end(dev);
+       mlx4_crdump_end(dev);
        if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
                mlx4_warn(dev, "Disabling SR-IOV\n");
                pci_disable_sriov(pdev);
@@ -3936,6 +4138,8 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 
        pci_release_regions(pdev);
        mlx4_pci_disable_device(dev);
+       devlink_params_unregister(devlink, mlx4_devlink_params,
+                                 ARRAY_SIZE(mlx4_devlink_params));
        devlink_unregister(devlink);
        kfree(dev->persist);
        devlink_free(devlink);
@@ -3960,7 +4164,7 @@ static int restore_current_port_types(struct mlx4_dev *dev,
        return err;
 }
 
-int mlx4_restart_one(struct pci_dev *pdev)
+int mlx4_restart_one(struct pci_dev *pdev, bool reload, struct devlink *devlink)
 {
        struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
        struct mlx4_dev  *dev  = persist->dev;
@@ -3973,6 +4177,8 @@ int mlx4_restart_one(struct pci_dev *pdev)
        memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
 
        mlx4_unload_one(pdev);
+       if (reload)
+               mlx4_devlink_param_load_driverinit_values(devlink);
        err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
        if (err) {
                mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
@@ -4205,7 +4411,7 @@ static int __init mlx4_verify_params(void)
        if (use_prio != 0)
                pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
 
-       if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
+       if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) {
                pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
                        log_mtts_per_seg);
                return -1;
index cb9e923e83996499c2d98c8023b5ebc069bb0731..ebcd2778eeb3e1f22524064ff2db7762e1f852ba 100644 (file)
@@ -84,7 +84,6 @@ enum {
        MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7,
        MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12,
        MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE) / 16 - 2),
-       MLX4_MTT_ENTRY_PER_SEG  = 8,
 };
 
 enum {
@@ -1042,7 +1041,10 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev);
 void mlx4_stop_catas_poll(struct mlx4_dev *dev);
 int mlx4_catas_init(struct mlx4_dev *dev);
 void mlx4_catas_end(struct mlx4_dev *dev);
-int mlx4_restart_one(struct pci_dev *pdev);
+int mlx4_crdump_init(struct mlx4_dev *dev);
+void mlx4_crdump_end(struct mlx4_dev *dev);
+int mlx4_restart_one(struct pci_dev *pdev, bool reload,
+                    struct devlink *devlink);
 int mlx4_register_device(struct mlx4_dev *dev);
 void mlx4_unregister_device(struct mlx4_dev *dev);
 void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
@@ -1227,6 +1229,8 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
 void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
 int mlx4_comm_internal_err(u32 slave_read);
 
+int mlx4_crdump_collect(struct mlx4_dev *dev);
+
 int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
                    enum mlx4_port_type *type);
 void mlx4_do_sense_ports(struct mlx4_dev *dev,
index ace6545f82e6b343d26acd6d0bb4c55cd6ae4809..c3228b89df463597de1cb546754ea1b8aa4d876d 100644 (file)
@@ -699,7 +699,8 @@ void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-                        void *accel_priv, select_queue_fallback_t fallback);
+                        struct net_device *sb_dev,
+                        select_queue_fallback_t fallback);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
                               struct mlx4_en_rx_alloc *frame,
index bae8b22edbb77db02b59754ec0722cb94ffdc396..ba361c5fbda3dea1c181cca6051d2a01ceccc4ff 100644 (file)
@@ -105,7 +105,8 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
        request->num_mtt =
                roundup_pow_of_two(max_t(unsigned, request->num_mtt,
                                         min(1UL << (31 - log_mtts_per_seg),
-                                            si.totalram >> (log_mtts_per_seg - 1))));
+                                            (si.totalram << 1) >> log_mtts_per_seg)));
+
 
        profile[MLX4_RES_QP].size     = dev_cap->qpc_entry_sz;
        profile[MLX4_RES_RDMARC].size = dev_cap->rdmarc_entry_sz;
index 2545296a0c087888701048dfd36b957cb91c6993..7a84dd07ced2524178eeffb1b45dec7f1b98a604 100644 (file)
@@ -3,7 +3,7 @@
 #
 
 config MLX5_CORE
-       tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver"
+       tristate "Mellanox 5th generation network adapters (ConnectX series) core driver"
        depends on MAY_USE_DEVLINK
        depends on PCI
        imply PTP_1588_CLOCK
@@ -27,7 +27,7 @@ config MLX5_FPGA
           sandbox-specific client drivers.
 
 config MLX5_CORE_EN
-       bool "Mellanox Technologies ConnectX-4 Ethernet support"
+       bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support"
        depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
        depends on IPV6=y || IPV6=n || MLX5_CORE=m
        select PAGE_POOL
@@ -69,7 +69,7 @@ config MLX5_CORE_EN_DCB
          If unsure, set to Y
 
 config MLX5_CORE_IPOIB
-       bool "Mellanox Technologies ConnectX-4 IPoIB offloads support"
+       bool "Mellanox 5th generation network adapters (connectX series) IPoIB offloads support"
        depends on MLX5_CORE_EN
        default n
        ---help---
index 9efbf193ad5a6ac26ec2599cbd7ba93ab8ec951b..f20fda1ced4f57d4ca0cd04f2a5264071012bb7f 100644 (file)
@@ -6,7 +6,7 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
                mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
                fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o lib/clock.o \
-               diag/fs_tracepoint.o
+               diag/fs_tracepoint.o diag/fw_tracer.o
 
 mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o
 
@@ -14,8 +14,8 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
                fpga/ipsec.o fpga/tls.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
-               en_tx.o en_rx.o en_dim.o en_txrx.o en_stats.o vxlan.o \
-               en_arfs.o en_fs_ethtool.o en_selftest.o en/port.o
+               en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
+               en_arfs.o en_fs_ethtool.o en_selftest.o en/port.o lib/vxlan.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
new file mode 100644 (file)
index 0000000..c132604
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef __MLX5E_ACCEL_H__
+#define __MLX5E_ACCEL_H__
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "en.h"
+
+static inline bool is_metadata_hdr_valid(struct sk_buff *skb)
+{
+       __be16 *ethtype;
+
+       if (unlikely(skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN))
+               return false;
+       ethtype = (__be16 *)(skb->data + ETH_ALEN * 2);
+       if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE))
+               return false;
+       return true;
+}
+
+static inline void remove_metadata_hdr(struct sk_buff *skb)
+{
+       struct ethhdr *old_eth;
+       struct ethhdr *new_eth;
+
+       /* Remove the metadata from the buffer */
+       old_eth = (struct ethhdr *)skb->data;
+       new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN);
+       memmove(new_eth, old_eth, 2 * ETH_ALEN);
+       /* Ethertype is already in its new place */
+       skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN);
+}
+
+#endif /* CONFIG_MLX5_ACCEL */
+
+#endif /* __MLX5E_EN_ACCEL_H__ */
index 77ac19f38cbe87c2268762cf027d08f8b2451872..da7bd26368f9bd4d19da7ff7b98ef3048860684c 100644 (file)
 #include "mlx5_core.h"
 #include "fpga/tls.h"
 
-int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-                              struct tls_crypto_info *crypto_info,
-                              u32 start_offload_tcp_sn, u32 *p_swid)
+int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+                           struct tls_crypto_info *crypto_info,
+                           u32 start_offload_tcp_sn, u32 *p_swid,
+                           bool direction_sx)
 {
-       return mlx5_fpga_tls_add_tx_flow(mdev, flow, crypto_info,
-                                        start_offload_tcp_sn, p_swid);
+       return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info,
+                                     start_offload_tcp_sn, p_swid,
+                                     direction_sx);
 }
 
-void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid)
+void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+                            bool direction_sx)
 {
-       mlx5_fpga_tls_del_tx_flow(mdev, swid, GFP_KERNEL);
+       mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx);
+}
+
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+                            u64 rcd_sn)
+{
+       return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn);
 }
 
 bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
index 6f9c9f446ecc8c129079d9fda018dbcedb9b84f3..def4093ebfae86434af1f81e35bd8cd0c6d167e2 100644 (file)
@@ -60,10 +60,14 @@ struct mlx5_ifc_tls_flow_bits {
        u8         reserved_at_2[0x1e];
 };
 
-int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-                              struct tls_crypto_info *crypto_info,
-                              u32 start_offload_tcp_sn, u32 *p_swid);
-void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid);
+int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+                           struct tls_crypto_info *crypto_info,
+                           u32 start_offload_tcp_sn, u32 *p_swid,
+                           bool direction_sx);
+void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+                            bool direction_sx);
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+                            u64 rcd_sn);
 bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
 u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
 int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
@@ -72,10 +76,14 @@ void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev);
 #else
 
 static inline int
-mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-                          struct tls_crypto_info *crypto_info,
-                          u32 start_offload_tcp_sn, u32 *p_swid) { return 0; }
-static inline void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid) { }
+mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+                       struct tls_crypto_info *crypto_info,
+                       u32 start_offload_tcp_sn, u32 *p_swid,
+                       bool direction_sx) { return -ENOTSUPP; }
+static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+                                          bool direction_sx) { }
+static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
+                                          u32 seq, u64 rcd_sn) { return 0; }
 static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
 static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
 static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
index 384c1fa490811ee651919c139b9cd9e724d4ff81..f498c7730c5b105cb23f41611ec84f7992dba830 100644 (file)
@@ -278,6 +278,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_DESTROY_PSV:
        case MLX5_CMD_OP_DESTROY_SRQ:
        case MLX5_CMD_OP_DESTROY_XRC_SRQ:
+       case MLX5_CMD_OP_DESTROY_XRQ:
        case MLX5_CMD_OP_DESTROY_DCT:
        case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
        case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT:
@@ -310,6 +311,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
        case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
        case MLX5_CMD_OP_FPGA_DESTROY_QP:
+       case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
                return MLX5_CMD_STAT_OK;
 
        case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -346,6 +348,9 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_CREATE_XRC_SRQ:
        case MLX5_CMD_OP_QUERY_XRC_SRQ:
        case MLX5_CMD_OP_ARM_XRC_SRQ:
+       case MLX5_CMD_OP_CREATE_XRQ:
+       case MLX5_CMD_OP_QUERY_XRQ:
+       case MLX5_CMD_OP_ARM_XRQ:
        case MLX5_CMD_OP_CREATE_DCT:
        case MLX5_CMD_OP_DRAIN_DCT:
        case MLX5_CMD_OP_QUERY_DCT:
@@ -427,6 +432,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_FPGA_MODIFY_QP:
        case MLX5_CMD_OP_FPGA_QUERY_QP:
        case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
+       case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
                *status = MLX5_DRIVER_STATUS_ABORTED;
                *synd = MLX5_DRIVER_SYND;
                return -EIO;
@@ -452,6 +458,7 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(SET_HCA_CAP);
        MLX5_COMMAND_STR_CASE(QUERY_ISSI);
        MLX5_COMMAND_STR_CASE(SET_ISSI);
+       MLX5_COMMAND_STR_CASE(SET_DRIVER_VERSION);
        MLX5_COMMAND_STR_CASE(CREATE_MKEY);
        MLX5_COMMAND_STR_CASE(QUERY_MKEY);
        MLX5_COMMAND_STR_CASE(DESTROY_MKEY);
@@ -599,6 +606,12 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
        MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
        MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
+       MLX5_COMMAND_STR_CASE(CREATE_XRQ);
+       MLX5_COMMAND_STR_CASE(DESTROY_XRQ);
+       MLX5_COMMAND_STR_CASE(QUERY_XRQ);
+       MLX5_COMMAND_STR_CASE(ARM_XRQ);
+       MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
+       MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
        default: return "unknown command opcode";
        }
 }
@@ -677,7 +690,7 @@ struct mlx5_ifc_mbox_out_bits {
 
 struct mlx5_ifc_mbox_in_bits {
        u8         opcode[0x10];
-       u8         reserved_at_10[0x10];
+       u8         uid[0x10];
 
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
@@ -697,6 +710,7 @@ static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out)
        u8  status;
        u16 opcode;
        u16 op_mod;
+       u16 uid;
 
        mlx5_cmd_mbox_status(out, &status, &syndrome);
        if (!status)
@@ -704,8 +718,15 @@ static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out)
 
        opcode = MLX5_GET(mbox_in, in, opcode);
        op_mod = MLX5_GET(mbox_in, in, op_mod);
+       uid    = MLX5_GET(mbox_in, in, uid);
 
-       mlx5_core_err(dev,
+       if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY)
+               mlx5_core_err_rl(dev,
+                       "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
+                       mlx5_command_str(opcode), opcode, op_mod,
+                       cmd_status_str(status), status, syndrome);
+       else
+               mlx5_core_dbg(dev,
                      "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
                      mlx5_command_str(opcode),
                      opcode, op_mod,
@@ -1022,7 +1043,10 @@ static ssize_t dbg_write(struct file *filp, const char __user *buf,
        if (!dbg->in_msg || !dbg->out_msg)
                return -ENOMEM;
 
-       if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+       if (count < sizeof(lbuf) - 1)
+               return -EINVAL;
+
+       if (copy_from_user(lbuf, buf, sizeof(lbuf) - 1))
                return -EFAULT;
 
        lbuf[sizeof(lbuf) - 1] = 0;
@@ -1226,21 +1250,12 @@ static ssize_t data_read(struct file *filp, char __user *buf, size_t count,
 {
        struct mlx5_core_dev *dev = filp->private_data;
        struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
-       int copy;
-
-       if (*pos)
-               return 0;
 
        if (!dbg->out_msg)
                return -ENOMEM;
 
-       copy = min_t(int, count, dbg->outlen);
-       if (copy_to_user(buf, dbg->out_msg, copy))
-               return -EFAULT;
-
-       *pos += copy;
-
-       return copy;
+       return simple_read_from_buffer(buf, count, pos, dbg->out_msg,
+                                      dbg->outlen);
 }
 
 static const struct file_operations dfops = {
@@ -1258,19 +1273,11 @@ static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count,
        char outlen[8];
        int err;
 
-       if (*pos)
-               return 0;
-
        err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen);
        if (err < 0)
                return err;
 
-       if (copy_to_user(buf, &outlen, err))
-               return -EFAULT;
-
-       *pos += err;
-
-       return err;
+       return simple_read_from_buffer(buf, count, pos, outlen, err);
 }
 
 static ssize_t outlen_write(struct file *filp, const char __user *buf,
index 413080a312a7fdba0d159559032d09eeea173460..90fabd612b6cd84f1420afa151cc6c3b0103acfb 100644 (file)
@@ -150,22 +150,13 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
        int ret;
        char tbuf[22];
 
-       if (*pos)
-               return 0;
-
        stats = filp->private_data;
        spin_lock_irq(&stats->lock);
        if (stats->n)
                field = div64_u64(stats->sum, stats->n);
        spin_unlock_irq(&stats->lock);
        ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
-       if (ret > 0) {
-               if (copy_to_user(buf, tbuf, ret))
-                       return -EFAULT;
-       }
-
-       *pos += ret;
-       return ret;
+       return simple_read_from_buffer(buf, count, pos, tbuf, ret);
 }
 
 static ssize_t average_write(struct file *filp, const char __user *buf,
@@ -442,9 +433,6 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
        u64 field;
        int ret;
 
-       if (*pos)
-               return 0;
-
        desc = filp->private_data;
        d = (void *)(desc - desc->i) - sizeof(*d);
        switch (d->type) {
@@ -470,13 +458,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
        else
                ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
 
-       if (ret > 0) {
-               if (copy_to_user(buf, tbuf, ret))
-                       return -EFAULT;
-       }
-
-       *pos += ret;
-       return ret;
+       return simple_read_from_buffer(buf, count, pos, tbuf, ret);
 }
 
 static const struct file_operations fops = {
index 09f178a3fcabb4082229a4f4985fc47295482d7f..0240aee9189eb90b551be15642344428f9485d83 100644 (file)
@@ -138,6 +138,8 @@ TRACE_EVENT(mlx5_fs_del_fg,
        {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR,       "MOD_HDR"},\
        {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH,     "VLAN_PUSH"},\
        {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP,      "VLAN_POP"},\
+       {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2,   "VLAN_PUSH_2"},\
+       {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2,    "VLAN_POP_2"},\
        {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"}
 
 TRACE_EVENT(mlx5_fs_set_fte,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
new file mode 100644 (file)
index 0000000..d4ec93b
--- /dev/null
@@ -0,0 +1,947 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#define CREATE_TRACE_POINTS
+#include "fw_tracer.h"
+#include "fw_tracer_tracepoint.h"
+
+static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
+{
+       u32 *string_db_base_address_out = tracer->str_db.base_address_out;
+       u32 *string_db_size_out = tracer->str_db.size_out;
+       struct mlx5_core_dev *dev = tracer->dev;
+       u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+       u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+       void *mtrc_cap_sp;
+       int err, i;
+
+       err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+                                  MLX5_REG_MTRC_CAP, 0, 0);
+       if (err) {
+               mlx5_core_warn(dev, "FWTracer: Error reading tracer caps %d\n",
+                              err);
+               return err;
+       }
+
+       if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) {
+               mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n");
+               return -ENOTSUPP;
+       }
+
+       tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver);
+       tracer->str_db.first_string_trace =
+                       MLX5_GET(mtrc_cap, out, first_string_trace);
+       tracer->str_db.num_string_trace =
+                       MLX5_GET(mtrc_cap, out, num_string_trace);
+       tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db);
+       tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+       for (i = 0; i < tracer->str_db.num_string_db; i++) {
+               mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]);
+               string_db_base_address_out[i] = MLX5_GET(mtrc_string_db_param,
+                                                        mtrc_cap_sp,
+                                                        string_db_base_address);
+               string_db_size_out[i] = MLX5_GET(mtrc_string_db_param,
+                                                mtrc_cap_sp, string_db_size);
+       }
+
+       return err;
+}
+
+static int mlx5_set_mtrc_caps_trace_owner(struct mlx5_fw_tracer *tracer,
+                                         u32 *out, u32 out_size,
+                                         u8 trace_owner)
+{
+       struct mlx5_core_dev *dev = tracer->dev;
+       u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+       MLX5_SET(mtrc_cap, in, trace_owner, trace_owner);
+
+       return mlx5_core_access_reg(dev, in, sizeof(in), out, out_size,
+                                   MLX5_REG_MTRC_CAP, 0, 1);
+}
+
+static int mlx5_fw_tracer_ownership_acquire(struct mlx5_fw_tracer *tracer)
+{
+       struct mlx5_core_dev *dev = tracer->dev;
+       u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+       int err;
+
+       err = mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+                                            MLX5_FW_TRACER_ACQUIRE_OWNERSHIP);
+       if (err) {
+               mlx5_core_warn(dev, "FWTracer: Acquire tracer ownership failed %d\n",
+                              err);
+               return err;
+       }
+
+       tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+       if (!tracer->owner)
+               return -EBUSY;
+
+       return 0;
+}
+
+static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer)
+{
+       u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+       mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+                                      MLX5_FW_TRACER_RELEASE_OWNERSHIP);
+       tracer->owner = false;
+}
+
+static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
+{
+       struct mlx5_core_dev *dev = tracer->dev;
+       struct device *ddev = &dev->pdev->dev;
+       dma_addr_t dma;
+       void *buff;
+       gfp_t gfp;
+       int err;
+
+       tracer->buff.size = TRACE_BUFFER_SIZE_BYTE;
+
+       gfp = GFP_KERNEL | __GFP_ZERO;
+       buff = (void *)__get_free_pages(gfp,
+                                       get_order(tracer->buff.size));
+       if (!buff) {
+               err = -ENOMEM;
+               mlx5_core_warn(dev, "FWTracer: Failed to allocate pages, %d\n", err);
+               return err;
+       }
+       tracer->buff.log_buf = buff;
+
+       dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE);
+       if (dma_mapping_error(ddev, dma)) {
+               mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n",
+                              dma_mapping_error(ddev, dma));
+               err = -ENOMEM;
+               goto free_pages;
+       }
+       tracer->buff.dma = dma;
+
+       return 0;
+
+free_pages:
+       free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
+
+       return err;
+}
+
+static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer)
+{
+       struct mlx5_core_dev *dev = tracer->dev;
+       struct device *ddev = &dev->pdev->dev;
+
+       if (!tracer->buff.log_buf)
+               return;
+
+       dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE);
+       free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
+}
+
+static int mlx5_fw_tracer_create_mkey(struct mlx5_fw_tracer *tracer)
+{
+       struct mlx5_core_dev *dev = tracer->dev;
+       int err, inlen, i;
+       __be64 *mtt;
+       void *mkc;
+       u32 *in;
+
+       inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+                       sizeof(*mtt) * round_up(TRACER_BUFFER_PAGE_NUM, 2);
+
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+                DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
+       mtt = (u64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+       for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++)
+               mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE);
+
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+       MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+       MLX5_SET(mkc, mkc, lr, 1);
+       MLX5_SET(mkc, mkc, lw, 1);
+       MLX5_SET(mkc, mkc, pd, tracer->buff.pdn);
+       MLX5_SET(mkc, mkc, bsf_octword_size, 0);
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+       MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+       MLX5_SET(mkc, mkc, translations_octword_size,
+                DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
+       MLX5_SET64(mkc, mkc, start_addr, tracer->buff.dma);
+       MLX5_SET64(mkc, mkc, len, tracer->buff.size);
+       err = mlx5_core_create_mkey(dev, &tracer->buff.mkey, in, inlen);
+       if (err)
+               mlx5_core_warn(dev, "FWTracer: Failed to create mkey, %d\n", err);
+
+       kvfree(in);
+
+       return err;
+}
+
+static void mlx5_fw_tracer_free_strings_db(struct mlx5_fw_tracer *tracer)
+{
+       u32 num_string_db = tracer->str_db.num_string_db;
+       int i;
+
+       for (i = 0; i < num_string_db; i++) {
+               kfree(tracer->str_db.buffer[i]);
+               tracer->str_db.buffer[i] = NULL;
+       }
+}
+
+static int mlx5_fw_tracer_allocate_strings_db(struct mlx5_fw_tracer *tracer)
+{
+       u32 *string_db_size_out = tracer->str_db.size_out;
+       u32 num_string_db = tracer->str_db.num_string_db;
+       int i;
+
+       for (i = 0; i < num_string_db; i++) {
+               tracer->str_db.buffer[i] = kzalloc(string_db_size_out[i], GFP_KERNEL);
+               if (!tracer->str_db.buffer[i])
+                       goto free_strings_db;
+       }
+
+       return 0;
+
+free_strings_db:
+       mlx5_fw_tracer_free_strings_db(tracer);
+       return -ENOMEM;
+}
+
+static void mlx5_tracer_read_strings_db(struct work_struct *work)
+{
+       struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
+                                                    read_fw_strings_work);
+       u32 num_of_reads, num_string_db = tracer->str_db.num_string_db;
+       struct mlx5_core_dev *dev = tracer->dev;
+       u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+       u32 leftovers, offset;
+       int err = 0, i, j;
+       u32 *out, outlen;
+       void *out_value;
+
+       outlen = MLX5_ST_SZ_BYTES(mtrc_stdb) + STRINGS_DB_READ_SIZE_BYTES;
+       out = kzalloc(outlen, GFP_KERNEL);
+       if (!out) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       for (i = 0; i < num_string_db; i++) {
+               offset = 0;
+               MLX5_SET(mtrc_stdb, in, string_db_index, i);
+               num_of_reads = tracer->str_db.size_out[i] /
+                               STRINGS_DB_READ_SIZE_BYTES;
+               leftovers = (tracer->str_db.size_out[i] %
+                               STRINGS_DB_READ_SIZE_BYTES) /
+                                       STRINGS_DB_LEFTOVER_SIZE_BYTES;
+
+               MLX5_SET(mtrc_stdb, in, read_size, STRINGS_DB_READ_SIZE_BYTES);
+               for (j = 0; j < num_of_reads; j++) {
+                       MLX5_SET(mtrc_stdb, in, start_offset, offset);
+
+                       err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+                                                  outlen, MLX5_REG_MTRC_STDB,
+                                                  0, 1);
+                       if (err) {
+                               mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n",
+                                             err);
+                               goto out_free;
+                       }
+
+                       out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data);
+                       memcpy(tracer->str_db.buffer[i] + offset, out_value,
+                              STRINGS_DB_READ_SIZE_BYTES);
+                       offset += STRINGS_DB_READ_SIZE_BYTES;
+               }
+
+               /* Strings database is aligned to 64, need to read leftovers*/
+               MLX5_SET(mtrc_stdb, in, read_size,
+                        STRINGS_DB_LEFTOVER_SIZE_BYTES);
+               for (j = 0; j < leftovers; j++) {
+                       MLX5_SET(mtrc_stdb, in, start_offset, offset);
+
+                       err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+                                                  outlen, MLX5_REG_MTRC_STDB,
+                                                  0, 1);
+                       if (err) {
+                               mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n",
+                                             err);
+                               goto out_free;
+                       }
+
+                       out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data);
+                       memcpy(tracer->str_db.buffer[i] + offset, out_value,
+                              STRINGS_DB_LEFTOVER_SIZE_BYTES);
+                       offset += STRINGS_DB_LEFTOVER_SIZE_BYTES;
+               }
+       }
+
+       tracer->str_db.loaded = true;
+
+out_free:
+       kfree(out);
+out:
+       return;
+}
+
+static void mlx5_fw_tracer_arm(struct mlx5_core_dev *dev)
+{
+       u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+       u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+       int err;
+
+       MLX5_SET(mtrc_ctrl, in, arm_event, 1);
+
+       err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+                                  MLX5_REG_MTRC_CTRL, 0, 1);
+       if (err)
+               mlx5_core_warn(dev, "FWTracer: Failed to arm tracer event %d\n", err);
+}
+
+static const char *VAL_PARM            = "%llx";
+static const char *REPLACE_64_VAL_PARM = "%x%x";
+static const char *PARAM_CHAR          = "%";
+
+static int mlx5_tracer_message_hash(u32 message_id)
+{
+       return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1);
+}
+
+static struct tracer_string_format *mlx5_tracer_message_insert(struct mlx5_fw_tracer *tracer,
+                                                              struct tracer_event *tracer_event)
+{
+       struct hlist_head *head =
+               &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)];
+       struct tracer_string_format *cur_string;
+
+       cur_string = kzalloc(sizeof(*cur_string), GFP_KERNEL);
+       if (!cur_string)
+               return NULL;
+
+       hlist_add_head(&cur_string->hlist, head);
+
+       return cur_string;
+}
+
+static struct tracer_string_format *mlx5_tracer_get_string(struct mlx5_fw_tracer *tracer,
+                                                          struct tracer_event *tracer_event)
+{
+       struct tracer_string_format *cur_string;
+       u32 str_ptr, offset;
+       int i;
+
+       str_ptr = tracer_event->string_event.string_param;
+
+       for (i = 0; i < tracer->str_db.num_string_db; i++) {
+               if (str_ptr > tracer->str_db.base_address_out[i] &&
+                   str_ptr < tracer->str_db.base_address_out[i] +
+                   tracer->str_db.size_out[i]) {
+                       offset = str_ptr - tracer->str_db.base_address_out[i];
+                       /* add it to the hash */
+                       cur_string = mlx5_tracer_message_insert(tracer, tracer_event);
+                       if (!cur_string)
+                               return NULL;
+                       cur_string->string = (char *)(tracer->str_db.buffer[i] +
+                                                       offset);
+                       return cur_string;
+               }
+       }
+
+       return NULL;
+}
+
+static void mlx5_tracer_clean_message(struct tracer_string_format *str_frmt)
+{
+       hlist_del(&str_frmt->hlist);
+       kfree(str_frmt);
+}
+
+static int mlx5_tracer_get_num_of_params(char *str)
+{
+       char *substr, *pstr = str;
+       int num_of_params = 0;
+
+       /* replace %llx with %x%x */
+       substr = strstr(pstr, VAL_PARM);
+       while (substr) {
+               memcpy(substr, REPLACE_64_VAL_PARM, 4);
+               pstr = substr;
+               substr = strstr(pstr, VAL_PARM);
+       }
+
+       /* count all the % characters */
+       substr = strstr(str, PARAM_CHAR);
+       while (substr) {
+               num_of_params += 1;
+               str = substr + 1;
+               substr = strstr(str, PARAM_CHAR);
+       }
+
+       return num_of_params;
+}
+
+static struct tracer_string_format *mlx5_tracer_message_find(struct hlist_head *head,
+                                                            u8 event_id, u32 tmsn)
+{
+       struct tracer_string_format *message;
+
+       hlist_for_each_entry(message, head, hlist)
+               if (message->event_id == event_id && message->tmsn == tmsn)
+                       return message;
+
+       return NULL;
+}
+
+static struct tracer_string_format *mlx5_tracer_message_get(struct mlx5_fw_tracer *tracer,
+                                                           struct tracer_event *tracer_event)
+{
+       struct hlist_head *head =
+               &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)];
+
+       return mlx5_tracer_message_find(head, tracer_event->event_id, tracer_event->string_event.tmsn);
+}
+
+static void poll_trace(struct mlx5_fw_tracer *tracer,
+                      struct tracer_event *tracer_event, u64 *trace)
+{
+       u32 timestamp_low, timestamp_mid, timestamp_high, urts;
+
+       tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id);
+       tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost);
+
+       switch (tracer_event->event_id) {
+       case TRACER_EVENT_TYPE_TIMESTAMP:
+               tracer_event->type = TRACER_EVENT_TYPE_TIMESTAMP;
+               urts = MLX5_GET(tracer_timestamp_event, trace, urts);
+               if (tracer->trc_ver == 0)
+                       tracer_event->timestamp_event.unreliable = !!(urts >> 2);
+               else
+                       tracer_event->timestamp_event.unreliable = !!(urts & 1);
+
+               timestamp_low = MLX5_GET(tracer_timestamp_event,
+                                        trace, timestamp7_0);
+               timestamp_mid = MLX5_GET(tracer_timestamp_event,
+                                        trace, timestamp39_8);
+               timestamp_high = MLX5_GET(tracer_timestamp_event,
+                                         trace, timestamp52_40);
+
+               tracer_event->timestamp_event.timestamp =
+                               ((u64)timestamp_high << 40) |
+                               ((u64)timestamp_mid << 8) |
+                               (u64)timestamp_low;
+               break;
+       default:
+               if (tracer_event->event_id >= tracer->str_db.first_string_trace ||
+                   tracer_event->event_id <= tracer->str_db.first_string_trace +
+                                             tracer->str_db.num_string_trace) {
+                       tracer_event->type = TRACER_EVENT_TYPE_STRING;
+                       tracer_event->string_event.timestamp =
+                               MLX5_GET(tracer_string_event, trace, timestamp);
+                       tracer_event->string_event.string_param =
+                               MLX5_GET(tracer_string_event, trace, string_param);
+                       tracer_event->string_event.tmsn =
+                               MLX5_GET(tracer_string_event, trace, tmsn);
+                       tracer_event->string_event.tdsn =
+                               MLX5_GET(tracer_string_event, trace, tdsn);
+               } else {
+                       tracer_event->type = TRACER_EVENT_TYPE_UNRECOGNIZED;
+               }
+               break;
+       }
+}
+
+static u64 get_block_timestamp(struct mlx5_fw_tracer *tracer, u64 *ts_event)
+{
+       struct tracer_event tracer_event;
+       u8 event_id;
+
+       event_id = MLX5_GET(tracer_event, ts_event, event_id);
+
+       if (event_id == TRACER_EVENT_TYPE_TIMESTAMP)
+               poll_trace(tracer, &tracer_event, ts_event);
+       else
+               tracer_event.timestamp_event.timestamp = 0;
+
+       return tracer_event.timestamp_event.timestamp;
+}
+
+static void mlx5_fw_tracer_clean_print_hash(struct mlx5_fw_tracer *tracer)
+{
+       struct tracer_string_format *str_frmt;
+       struct hlist_node *n;
+       int i;
+
+       for (i = 0; i < MESSAGE_HASH_SIZE; i++) {
+               hlist_for_each_entry_safe(str_frmt, n, &tracer->hash[i], hlist)
+                       mlx5_tracer_clean_message(str_frmt);
+       }
+}
+
+static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer)
+{
+       struct tracer_string_format *str_frmt, *tmp_str;
+
+       list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list,
+                                list)
+               list_del(&str_frmt->list);
+}
+
+static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
+                                   struct mlx5_core_dev *dev,
+                                   u64 trace_timestamp)
+{
+       char    tmp[512];
+
+       snprintf(tmp, sizeof(tmp), str_frmt->string,
+                str_frmt->params[0],
+                str_frmt->params[1],
+                str_frmt->params[2],
+                str_frmt->params[3],
+                str_frmt->params[4],
+                str_frmt->params[5],
+                str_frmt->params[6]);
+
+       trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
+                     str_frmt->event_id, tmp);
+
+       /* remove it from hash */
+       mlx5_tracer_clean_message(str_frmt);
+}
+
+static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
+                                          struct tracer_event *tracer_event)
+{
+       struct tracer_string_format *cur_string;
+
+       if (tracer_event->string_event.tdsn == 0) {
+               cur_string = mlx5_tracer_get_string(tracer, tracer_event);
+               if (!cur_string)
+                       return -1;
+
+               cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string);
+               cur_string->last_param_num = 0;
+               cur_string->event_id = tracer_event->event_id;
+               cur_string->tmsn = tracer_event->string_event.tmsn;
+               cur_string->timestamp = tracer_event->string_event.timestamp;
+               cur_string->lost = tracer_event->lost_event;
+               if (cur_string->num_of_params == 0) /* trace with no params */
+                       list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+       } else {
+               cur_string = mlx5_tracer_message_get(tracer, tracer_event);
+               if (!cur_string) {
+                       pr_debug("%s Got string event for unknown string tdsm: %d\n",
+                                __func__, tracer_event->string_event.tmsn);
+                       return -1;
+               }
+               cur_string->last_param_num += 1;
+               if (cur_string->last_param_num > TRACER_MAX_PARAMS) {
+                       pr_debug("%s Number of params exceeds the max (%d)\n",
+                                __func__, TRACER_MAX_PARAMS);
+                       list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+                       return 0;
+               }
+               /* keep the new parameter */
+               cur_string->params[cur_string->last_param_num - 1] =
+                       tracer_event->string_event.string_param;
+               if (cur_string->last_param_num == cur_string->num_of_params)
+                       list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+       }
+
+       return 0;
+}
+
+static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer,
+                                              struct tracer_event *tracer_event)
+{
+       struct tracer_timestamp_event timestamp_event =
+                                               tracer_event->timestamp_event;
+       struct tracer_string_format *str_frmt, *tmp_str;
+       struct mlx5_core_dev *dev = tracer->dev;
+       u64 trace_timestamp;
+
+       list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list, list) {
+               list_del(&str_frmt->list);
+               if (str_frmt->timestamp < (timestamp_event.timestamp & MASK_6_0))
+                       trace_timestamp = (timestamp_event.timestamp & MASK_52_7) |
+                                         (str_frmt->timestamp & MASK_6_0);
+               else
+                       trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) |
+                                         (str_frmt->timestamp & MASK_6_0);
+
+               mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp);
+       }
+}
+
+static int mlx5_tracer_handle_trace(struct mlx5_fw_tracer *tracer,
+                                   struct tracer_event *tracer_event)
+{
+       if (tracer_event->type == TRACER_EVENT_TYPE_STRING) {
+               mlx5_tracer_handle_string_trace(tracer, tracer_event);
+       } else if (tracer_event->type == TRACER_EVENT_TYPE_TIMESTAMP) {
+               if (!tracer_event->timestamp_event.unreliable)
+                       mlx5_tracer_handle_timestamp_trace(tracer, tracer_event);
+       } else {
+               pr_debug("%s Got unrecognised type %d for parsing, exiting..\n",
+                        __func__, tracer_event->type);
+       }
+       return 0;
+}
+
+static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
+{
+       struct mlx5_fw_tracer *tracer =
+                       container_of(work, struct mlx5_fw_tracer, handle_traces_work);
+       u64 block_timestamp, last_block_timestamp, tmp_trace_block[TRACES_PER_BLOCK];
+       u32 block_count, start_offset, prev_start_offset, prev_consumer_index;
+       u32 trace_event_size = MLX5_ST_SZ_BYTES(tracer_event);
+       struct mlx5_core_dev *dev = tracer->dev;
+       struct tracer_event tracer_event;
+       int i;
+
+       mlx5_core_dbg(dev, "FWTracer: Handle Trace event, owner=(%d)\n", tracer->owner);
+       if (!tracer->owner)
+               return;
+
+       block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
+       start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+       /* Copy the block to local buffer to avoid HW override while being processed*/
+       memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+              TRACER_BLOCK_SIZE_BYTE);
+
+       block_timestamp =
+               get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+
+       while (block_timestamp > tracer->last_timestamp) {
+               /* Check block override if its not the first block */
+               if (!tracer->last_timestamp) {
+                       u64 *ts_event;
+                       /* To avoid block override be the HW in case of buffer
+                        * wraparound, the time stamp of the previous block
+                        * should be compared to the last timestamp handled
+                        * by the driver.
+                        */
+                       prev_consumer_index =
+                               (tracer->buff.consumer_index - 1) & (block_count - 1);
+                       prev_start_offset = prev_consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+                       ts_event = tracer->buff.log_buf + prev_start_offset +
+                                  (TRACES_PER_BLOCK - 1) * trace_event_size;
+                       last_block_timestamp = get_block_timestamp(tracer, ts_event);
+                       /* If previous timestamp different from last stored
+                        * timestamp then there is a good chance that the
+                        * current buffer is overwritten and therefore should
+                        * not be parsed.
+                        */
+                       if (tracer->last_timestamp != last_block_timestamp) {
+                               mlx5_core_warn(dev, "FWTracer: Events were lost\n");
+                               tracer->last_timestamp = block_timestamp;
+                               tracer->buff.consumer_index =
+                                       (tracer->buff.consumer_index + 1) & (block_count - 1);
+                               break;
+                       }
+               }
+
+               /* Parse events */
+               for (i = 0; i < TRACES_PER_BLOCK ; i++) {
+                       poll_trace(tracer, &tracer_event, &tmp_trace_block[i]);
+                       mlx5_tracer_handle_trace(tracer, &tracer_event);
+               }
+
+               tracer->buff.consumer_index =
+                       (tracer->buff.consumer_index + 1) & (block_count - 1);
+
+               tracer->last_timestamp = block_timestamp;
+               start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+               memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+                      TRACER_BLOCK_SIZE_BYTE);
+               block_timestamp = get_block_timestamp(tracer,
+                                                     &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+       }
+
+       mlx5_fw_tracer_arm(dev);
+}
+
+static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
+{
+       struct mlx5_core_dev *dev = tracer->dev;
+       u32 out[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+       u32 in[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+       int err;
+
+       MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY);
+       MLX5_SET(mtrc_conf, in, log_trace_buffer_size,
+                ilog2(TRACER_BUFFER_PAGE_NUM));
+       MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key);
+
+       err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+                                  MLX5_REG_MTRC_CONF, 0, 1);
+       if (err)
+               mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err);
+
+       return err;
+}
+
+static int mlx5_fw_tracer_set_mtrc_ctrl(struct mlx5_fw_tracer *tracer, u8 status, u8 arm)
+{
+       struct mlx5_core_dev *dev = tracer->dev;
+       u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+       u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+       int err;
+
+       MLX5_SET(mtrc_ctrl, in, modify_field_select, TRACE_STATUS);
+       MLX5_SET(mtrc_ctrl, in, trace_status, status);
+       MLX5_SET(mtrc_ctrl, in, arm_event, arm);
+
+       err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+                                  MLX5_REG_MTRC_CTRL, 0, 1);
+
+       if (!err && status)
+               tracer->last_timestamp = 0;
+
+       return err;
+}
+
+static int mlx5_fw_tracer_start(struct mlx5_fw_tracer *tracer)
+{
+       struct mlx5_core_dev *dev = tracer->dev;
+       int err;
+
+       err = mlx5_fw_tracer_ownership_acquire(tracer);
+       if (err) {
+               mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
+               /* Don't fail since ownership can be acquired on a later FW event */
+               return 0;
+       }
+
+       err = mlx5_fw_tracer_set_mtrc_conf(tracer);
+       if (err) {
+               mlx5_core_warn(dev, "FWTracer: Failed to set tracer configuration %d\n", err);
+               goto release_ownership;
+       }
+
+       /* enable tracer & trace events */
+       err = mlx5_fw_tracer_set_mtrc_ctrl(tracer, 1, 1);
+       if (err) {
+               mlx5_core_warn(dev, "FWTracer: Failed to enable tracer %d\n", err);
+               goto release_ownership;
+       }
+
+       mlx5_core_dbg(dev, "FWTracer: Ownership granted and active\n");
+       return 0;
+
+release_ownership:
+       mlx5_fw_tracer_ownership_release(tracer);
+       return err;
+}
+
+static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+{
+       struct mlx5_fw_tracer *tracer =
+               container_of(work, struct mlx5_fw_tracer, ownership_change_work);
+
+       mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner);
+       if (tracer->owner) {
+               tracer->owner = false;
+               tracer->buff.consumer_index = 0;
+               return;
+       }
+
+       mlx5_fw_tracer_start(tracer);
+}
+
+/* Create software resources (Buffers, etc ..) */
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
+{
+       struct mlx5_fw_tracer *tracer = NULL;
+       int err;
+
+       if (!MLX5_CAP_MCAM_REG(dev, tracer_registers)) {
+               mlx5_core_dbg(dev, "FWTracer: Tracer capability not present\n");
+               return NULL;
+       }
+
+       tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+       if (!tracer)
+               return ERR_PTR(-ENOMEM);
+
+       tracer->work_queue = create_singlethread_workqueue("mlx5_fw_tracer");
+       if (!tracer->work_queue) {
+               err = -ENOMEM;
+               goto free_tracer;
+       }
+
+       tracer->dev = dev;
+
+       INIT_LIST_HEAD(&tracer->ready_strings_list);
+       INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change);
+       INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db);
+       INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces);
+
+
+       err = mlx5_query_mtrc_caps(tracer);
+       if (err) {
+               mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
+               goto destroy_workqueue;
+       }
+
+       err = mlx5_fw_tracer_create_log_buf(tracer);
+       if (err) {
+               mlx5_core_warn(dev, "FWTracer: Create log buffer failed %d\n", err);
+               goto destroy_workqueue;
+       }
+
+       err = mlx5_fw_tracer_allocate_strings_db(tracer);
+       if (err) {
+               mlx5_core_warn(dev, "FWTracer: Allocate strings database failed %d\n", err);
+               goto free_log_buf;
+       }
+
+       mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
+
+       return tracer;
+
+free_log_buf:
+       mlx5_fw_tracer_destroy_log_buf(tracer);
+destroy_workqueue:
+       tracer->dev = NULL;
+       destroy_workqueue(tracer->work_queue);
+free_tracer:
+       kfree(tracer);
+       return ERR_PTR(err);
+}
+
+/* Create HW resources + start tracer
+ * must be called before Async EQ is created
+ */
+int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
+{
+       struct mlx5_core_dev *dev;
+       int err;
+
+       if (IS_ERR_OR_NULL(tracer))
+               return 0;
+
+       dev = tracer->dev;
+
+       if (!tracer->str_db.loaded)
+               queue_work(tracer->work_queue, &tracer->read_fw_strings_work);
+
+       err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn);
+       if (err) {
+               mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err);
+               return err;
+       }
+
+       err = mlx5_fw_tracer_create_mkey(tracer);
+       if (err) {
+               mlx5_core_warn(dev, "FWTracer: Failed to create mkey %d\n", err);
+               goto err_dealloc_pd;
+       }
+
+       mlx5_fw_tracer_start(tracer);
+
+       return 0;
+
+err_dealloc_pd:
+       mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
+       return err;
+}
+
+/* Stop tracer + Cleanup HW resources
+ * must be called after Async EQ is destroyed
+ */
+void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
+{
+       if (IS_ERR_OR_NULL(tracer))
+               return;
+
+       mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
+                     tracer->owner);
+
+       cancel_work_sync(&tracer->ownership_change_work);
+       cancel_work_sync(&tracer->handle_traces_work);
+
+       if (tracer->owner)
+               mlx5_fw_tracer_ownership_release(tracer);
+
+       mlx5_core_destroy_mkey(tracer->dev, &tracer->buff.mkey);
+       mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn);
+}
+
+/* Free software resources (Buffers, etc ..) */
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
+{
+       if (IS_ERR_OR_NULL(tracer))
+               return;
+
+       mlx5_core_dbg(tracer->dev, "FWTracer: Destroy\n");
+
+       cancel_work_sync(&tracer->read_fw_strings_work);
+       mlx5_fw_tracer_clean_ready_list(tracer);
+       mlx5_fw_tracer_clean_print_hash(tracer);
+       mlx5_fw_tracer_free_strings_db(tracer);
+       mlx5_fw_tracer_destroy_log_buf(tracer);
+       flush_workqueue(tracer->work_queue);
+       destroy_workqueue(tracer->work_queue);
+       kfree(tracer);
+}
+
+void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+       struct mlx5_fw_tracer *tracer = dev->tracer;
+
+       if (!tracer)
+               return;
+
+       switch (eqe->sub_type) {
+       case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
+               if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state))
+                       queue_work(tracer->work_queue, &tracer->ownership_change_work);
+               break;
+       case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
+               if (likely(tracer->str_db.loaded))
+                       queue_work(tracer->work_queue, &tracer->handle_traces_work);
+               break;
+       default:
+               mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
+                             eqe->sub_type);
+       }
+}
+
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
new file mode 100644 (file)
index 0000000..0347f2d
--- /dev/null
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_TRACER_H__
+#define __LIB_TRACER_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#define STRINGS_DB_SECTIONS_NUM 8
+#define STRINGS_DB_READ_SIZE_BYTES 256
+#define STRINGS_DB_LEFTOVER_SIZE_BYTES 64
+#define TRACER_BUFFER_PAGE_NUM 64
+#define TRACER_BUFFER_CHUNK 4096
+#define TRACE_BUFFER_SIZE_BYTE (TRACER_BUFFER_PAGE_NUM * TRACER_BUFFER_CHUNK)
+
+#define TRACER_BLOCK_SIZE_BYTE 256
+#define TRACES_PER_BLOCK 32
+
+#define TRACER_MAX_PARAMS 7
+#define MESSAGE_HASH_BITS 6
+#define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS)
+
+#define MASK_52_7 (0x1FFFFFFFFFFF80)
+#define MASK_6_0  (0x7F)
+
+struct mlx5_fw_tracer {
+       struct mlx5_core_dev *dev;
+       bool owner;
+       u8   trc_ver;
+       struct workqueue_struct *work_queue;
+       struct work_struct ownership_change_work;
+       struct work_struct read_fw_strings_work;
+
+       /* Strings DB */
+       struct {
+               u8 first_string_trace;
+               u8 num_string_trace;
+               u32 num_string_db;
+               u32 base_address_out[STRINGS_DB_SECTIONS_NUM];
+               u32 size_out[STRINGS_DB_SECTIONS_NUM];
+               void *buffer[STRINGS_DB_SECTIONS_NUM];
+               bool loaded;
+       } str_db;
+
+       /* Log Buffer */
+       struct {
+               u32 pdn;
+               void *log_buf;
+               dma_addr_t dma;
+               u32 size;
+               struct mlx5_core_mkey mkey;
+               u32 consumer_index;
+       } buff;
+
+       u64 last_timestamp;
+       struct work_struct handle_traces_work;
+       struct hlist_head hash[MESSAGE_HASH_SIZE];
+       struct list_head ready_strings_list;
+};
+
+struct tracer_string_format {
+       char *string;
+       int params[TRACER_MAX_PARAMS];
+       int num_of_params;
+       int last_param_num;
+       u8 event_id;
+       u32 tmsn;
+       struct hlist_node hlist;
+       struct list_head list;
+       u32 timestamp;
+       bool lost;
+};
+
+enum mlx5_fw_tracer_ownership_state {
+       MLX5_FW_TRACER_RELEASE_OWNERSHIP,
+       MLX5_FW_TRACER_ACQUIRE_OWNERSHIP,
+};
+
+enum tracer_ctrl_fields_select {
+       TRACE_STATUS = 1 << 0,
+};
+
+enum tracer_event_type {
+       TRACER_EVENT_TYPE_STRING,
+       TRACER_EVENT_TYPE_TIMESTAMP = 0xFF,
+       TRACER_EVENT_TYPE_UNRECOGNIZED,
+};
+
+enum tracing_mode {
+       TRACE_TO_MEMORY = 1 << 0,
+};
+
+struct tracer_timestamp_event {
+       u64        timestamp;
+       u8         unreliable;
+};
+
+struct tracer_string_event {
+       u32        timestamp;
+       u32        tmsn;
+       u32        tdsn;
+       u32        string_param;
+};
+
+struct tracer_event {
+       bool      lost_event;
+       u32       type;
+       u8        event_id;
+       union {
+               struct tracer_string_event string_event;
+               struct tracer_timestamp_event timestamp_event;
+       };
+};
+
+struct mlx5_ifc_tracer_event_bits {
+       u8         lost[0x1];
+       u8         timestamp[0x7];
+       u8         event_id[0x8];
+       u8         event_data[0x30];
+};
+
+struct mlx5_ifc_tracer_string_event_bits {
+       u8         lost[0x1];
+       u8         timestamp[0x7];
+       u8         event_id[0x8];
+       u8         tmsn[0xd];
+       u8         tdsn[0x3];
+       u8         string_param[0x20];
+};
+
+struct mlx5_ifc_tracer_timestamp_event_bits {
+       u8         timestamp7_0[0x8];
+       u8         event_id[0x8];
+       u8         urts[0x3];
+       u8         timestamp52_40[0xd];
+       u8         timestamp39_8[0x20];
+};
+
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
new file mode 100644 (file)
index 0000000..83f90e9
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(__LIB_TRACER_TRACEPOINT_H__) || defined(TRACE_HEADER_MULTI_READ)
+#define __LIB_TRACER_TRACEPOINT_H__
+
+#include <linux/tracepoint.h>
+#include "fw_tracer.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+/* Tracepoint for FWTracer messages: */
+TRACE_EVENT(mlx5_fw,
+       TP_PROTO(const struct mlx5_fw_tracer *tracer, u64 trace_timestamp,
+                bool lost, u8 event_id, const char *msg),
+
+       TP_ARGS(tracer, trace_timestamp, lost, event_id, msg),
+
+       TP_STRUCT__entry(
+               __string(dev_name, dev_name(&tracer->dev->pdev->dev))
+               __field(u64, trace_timestamp)
+               __field(bool, lost)
+               __field(u8, event_id)
+               __string(msg, msg)
+       ),
+
+       TP_fast_assign(
+               __assign_str(dev_name, dev_name(&tracer->dev->pdev->dev));
+               __entry->trace_timestamp = trace_timestamp;
+               __entry->lost = lost;
+               __entry->event_id = event_id;
+               __assign_str(msg, msg);
+       ),
+
+       TP_printk("%s [0x%llx] %d [0x%x] %s",
+                 __get_str(dev_name),
+                 __entry->trace_timestamp,
+                 __entry->lost, __entry->event_id,
+                 __get_str(msg))
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ./diag
+#define TRACE_INCLUDE_FILE fw_tracer_tracepoint
+#include <trace/define_trace.h>
index eb9eb7aa953ae5560db4746569a02f177bcddf13..c7ed3d20fd54b7924f8622e7606f80e68d21e2ab 100644 (file)
@@ -137,7 +137,6 @@ struct page_pool;
 #define MLX5E_MAX_NUM_CHANNELS         (MLX5E_INDIR_RQT_SIZE >> 1)
 #define MLX5E_MAX_NUM_SQS              (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
 #define MLX5E_TX_CQ_POLL_BUDGET        128
-#define MLX5E_UPDATE_STATS_INTERVAL    200 /* msecs */
 #define MLX5E_SQ_RECOVER_MIN_INTERVAL  500 /* msecs */
 
 #define MLX5E_UMR_WQE_INLINE_SZ \
@@ -148,10 +147,6 @@ struct page_pool;
        (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
 #define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS
 
-#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_DS_COUNT \
-       ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
-
 #define MLX5E_NUM_MAIN_GROUPS 9
 
 #define MLX5E_MSG_LEVEL                        NETIF_MSG_LINK
@@ -349,6 +344,7 @@ enum {
        MLX5E_SQ_STATE_IPSEC,
        MLX5E_SQ_STATE_AM,
        MLX5E_SQ_STATE_TLS,
+       MLX5E_SQ_STATE_REDIRECT,
 };
 
 struct mlx5e_sq_wqe_info {
@@ -369,16 +365,14 @@ struct mlx5e_txqsq {
 
        struct mlx5e_cq            cq;
 
-       /* write@xmit, read@completion */
-       struct {
-               struct mlx5e_sq_dma       *dma_fifo;
-               struct mlx5e_tx_wqe_info  *wqe_info;
-       } db;
-
        /* read only */
        struct mlx5_wq_cyc         wq;
        u32                        dma_fifo_mask;
        struct mlx5e_sq_stats     *stats;
+       struct {
+               struct mlx5e_sq_dma       *dma_fifo;
+               struct mlx5e_tx_wqe_info  *wqe_info;
+       } db;
        void __iomem              *uar_map;
        struct netdev_queue       *txq;
        u32                        sqn;
@@ -400,30 +394,43 @@ struct mlx5e_txqsq {
        } recover;
 } ____cacheline_aligned_in_smp;
 
+struct mlx5e_dma_info {
+       struct page     *page;
+       dma_addr_t      addr;
+};
+
+struct mlx5e_xdp_info {
+       struct xdp_frame      *xdpf;
+       dma_addr_t            dma_addr;
+       struct mlx5e_dma_info di;
+};
+
 struct mlx5e_xdpsq {
        /* data path */
 
-       /* dirtied @rx completion */
+       /* dirtied @completion */
        u16                        cc;
-       u16                        pc;
+       bool                       redirect_flush;
 
-       struct mlx5e_cq            cq;
+       /* dirtied @xmit */
+       u16                        pc ____cacheline_aligned_in_smp;
+       bool                       doorbell;
 
-       /* write@xmit, read@completion */
-       struct {
-               struct mlx5e_dma_info     *di;
-               bool                       doorbell;
-               bool                       redirect_flush;
-       } db;
+       struct mlx5e_cq            cq;
 
        /* read only */
        struct mlx5_wq_cyc         wq;
+       struct mlx5e_xdpsq_stats  *stats;
+       struct {
+               struct mlx5e_xdp_info     *xdpi;
+       } db;
        void __iomem              *uar_map;
        u32                        sqn;
        struct device             *pdev;
        __be32                     mkey_be;
        u8                         min_inline_mode;
        unsigned long              state;
+       unsigned int               hw_mtu;
 
        /* control path */
        struct mlx5_wq_ctrl        wq_ctrl;
@@ -460,11 +467,6 @@ mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
        return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
 }
 
-struct mlx5e_dma_info {
-       struct page     *page;
-       dma_addr_t      addr;
-};
-
 struct mlx5e_wqe_frag_info {
        struct mlx5e_dma_info *di;
        u32 offset;
@@ -567,7 +569,6 @@ struct mlx5e_rq {
 
        /* XDP */
        struct bpf_prog       *xdp_prog;
-       unsigned int           hw_mtu;
        struct mlx5e_xdpsq     xdpsq;
        DECLARE_BITMAP(flags, 8);
        struct page_pool      *page_pool;
@@ -596,6 +597,9 @@ struct mlx5e_channel {
        __be32                     mkey_be;
        u8                         num_tc;
 
+       /* XDP_REDIRECT */
+       struct mlx5e_xdpsq         xdpsq;
+
        /* data path - accessed per napi poll */
        struct irq_desc *irq_desc;
        struct mlx5e_ch_stats     *stats;
@@ -618,6 +622,8 @@ struct mlx5e_channel_stats {
        struct mlx5e_ch_stats ch;
        struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
        struct mlx5e_rq_stats rq;
+       struct mlx5e_xdpsq_stats rq_xdpsq;
+       struct mlx5e_xdpsq_stats xdpsq;
 } ____cacheline_aligned_in_smp;
 
 enum mlx5e_traffic_types {
@@ -648,11 +654,6 @@ enum {
        MLX5E_STATE_DESTROYING,
 };
 
-struct mlx5e_vxlan_db {
-       spinlock_t                      lock; /* protect vxlan table */
-       struct radix_tree_root          tree;
-};
-
 struct mlx5e_l2_rule {
        u8  addr[ETH_ALEN + 2];
        struct mlx5_flow_handle *rule;
@@ -810,7 +811,6 @@ struct mlx5e_priv {
        u32                        tx_rates[MLX5E_MAX_NUM_SQS];
 
        struct mlx5e_flow_steering fs;
-       struct mlx5e_vxlan_db      vxlan;
 
        struct workqueue_struct    *wq;
        struct work_struct         update_carrier_work;
@@ -866,7 +866,8 @@ struct mlx5e_profile {
 void mlx5e_build_ptys2ethtool_map(void);
 
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
-                      void *accel_priv, select_queue_fallback_t fallback);
+                      struct net_device *sb_dev,
+                      select_queue_fallback_t fallback);
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                          struct mlx5e_tx_wqe *wqe, u16 pi);
@@ -876,14 +877,13 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
 
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
 bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
                                struct mlx5e_params *params);
 
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
 void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
                        bool recycle);
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
@@ -892,7 +892,6 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
 bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
-void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
 struct sk_buff *
 mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
                                u16 cqe_bcnt, u32 head_offset, u32 page_idx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
new file mode 100644 (file)
index 0000000..1881468
--- /dev/null
@@ -0,0 +1,305 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bpf_trace.h>
+#include "en/xdp.h"
+
+static inline bool
+mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
+                   struct xdp_buff *xdp)
+{
+       struct mlx5e_xdp_info xdpi;
+
+       xdpi.xdpf = convert_to_xdp_frame(xdp);
+       if (unlikely(!xdpi.xdpf))
+               return false;
+       xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf);
+       dma_sync_single_for_device(sq->pdev, xdpi.dma_addr,
+                                  xdpi.xdpf->len, PCI_DMA_TODEVICE);
+       xdpi.di = *di;
+
+       return mlx5e_xmit_xdp_frame(sq, &xdpi);
+}
+
+/* returns true if packet was consumed by xdp */
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+                     void *va, u16 *rx_headroom, u32 *len)
+{
+       struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
+       struct xdp_buff xdp;
+       u32 act;
+       int err;
+
+       if (!prog)
+               return false;
+
+       xdp.data = va + *rx_headroom;
+       xdp_set_data_meta_invalid(&xdp);
+       xdp.data_end = xdp.data + *len;
+       xdp.data_hard_start = va;
+       xdp.rxq = &rq->xdp_rxq;
+
+       act = bpf_prog_run_xdp(prog, &xdp);
+       switch (act) {
+       case XDP_PASS:
+               *rx_headroom = xdp.data - xdp.data_hard_start;
+               *len = xdp.data_end - xdp.data;
+               return false;
+       case XDP_TX:
+               if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp)))
+                       goto xdp_abort;
+               __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+               return true;
+       case XDP_REDIRECT:
+               /* When XDP enabled then page-refcnt==1 here */
+               err = xdp_do_redirect(rq->netdev, &xdp, prog);
+               if (unlikely(err))
+                       goto xdp_abort;
+               __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
+               rq->xdpsq.redirect_flush = true;
+               mlx5e_page_dma_unmap(rq, di);
+               rq->stats->xdp_redirect++;
+               return true;
+       default:
+               bpf_warn_invalid_xdp_action(act);
+       case XDP_ABORTED:
+xdp_abort:
+               trace_xdp_exception(rq->netdev, prog, act);
+       case XDP_DROP:
+               rq->stats->xdp_drop++;
+               return true;
+       }
+}
+
+bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+{
+       struct mlx5_wq_cyc       *wq   = &sq->wq;
+       u16                       pi   = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+       struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+
+       struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+       struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
+       struct mlx5_wqe_data_seg *dseg = wqe->data;
+
+       struct xdp_frame *xdpf = xdpi->xdpf;
+       dma_addr_t dma_addr  = xdpi->dma_addr;
+       unsigned int dma_len = xdpf->len;
+
+       struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+       prefetchw(wqe);
+
+       if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
+               stats->err++;
+               return false;
+       }
+
+       if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
+               if (sq->doorbell) {
+                       /* SQ is full, ring doorbell */
+                       mlx5e_xmit_xdp_doorbell(sq);
+                       sq->doorbell = false;
+               }
+               stats->full++;
+               return false;
+       }
+
+       cseg->fm_ce_se = 0;
+
+       /* copy the inline part if required */
+       if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+               memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE);
+               eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+               dma_len  -= MLX5E_XDP_MIN_INLINE;
+               dma_addr += MLX5E_XDP_MIN_INLINE;
+               dseg++;
+       }
+
+       /* write the dma part */
+       dseg->addr       = cpu_to_be64(dma_addr);
+       dseg->byte_count = cpu_to_be32(dma_len);
+
+       cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
+
+       /* move page to reference to sq responsibility,
+        * and mark so it's not put back in page-cache.
+        */
+       sq->db.xdpi[pi] = *xdpi;
+       sq->pc++;
+
+       sq->doorbell = true;
+
+       stats->xmit++;
+       return true;
+}
+
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
+{
+       struct mlx5e_xdpsq *sq;
+       struct mlx5_cqe64 *cqe;
+       struct mlx5e_rq *rq;
+       bool is_redirect;
+       u16 sqcc;
+       int i;
+
+       sq = container_of(cq, struct mlx5e_xdpsq, cq);
+
+       if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+               return false;
+
+       cqe = mlx5_cqwq_get_cqe(&cq->wq);
+       if (!cqe)
+               return false;
+
+       is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
+       rq = container_of(sq, struct mlx5e_rq, xdpsq);
+
+       /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+        * otherwise a cq overrun may occur
+        */
+       sqcc = sq->cc;
+
+       i = 0;
+       do {
+               u16 wqe_counter;
+               bool last_wqe;
+
+               mlx5_cqwq_pop(&cq->wq);
+
+               wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+               do {
+                       u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+                       struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+
+                       last_wqe = (sqcc == wqe_counter);
+                       sqcc++;
+
+                       if (is_redirect) {
+                               xdp_return_frame(xdpi->xdpf);
+                               dma_unmap_single(sq->pdev, xdpi->dma_addr,
+                                                xdpi->xdpf->len, DMA_TO_DEVICE);
+                       } else {
+                               /* Recycle RX page */
+                               mlx5e_page_release(rq, &xdpi->di, true);
+                       }
+               } while (!last_wqe);
+       } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+       sq->stats->cqes += i;
+
+       mlx5_cqwq_update_db_record(&cq->wq);
+
+       /* ensure cq space is freed before enabling more cqes */
+       wmb();
+
+       sq->cc = sqcc;
+       return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
+{
+       struct mlx5e_rq *rq;
+       bool is_redirect;
+
+       is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
+       rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq);
+
+       while (sq->cc != sq->pc) {
+               u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+               struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+
+               sq->cc++;
+
+               if (is_redirect) {
+                       xdp_return_frame(xdpi->xdpf);
+                       dma_unmap_single(sq->pdev, xdpi->dma_addr,
+                                        xdpi->xdpf->len, DMA_TO_DEVICE);
+               } else {
+                       /* Recycle RX page */
+                       mlx5e_page_release(rq, &xdpi->di, false);
+               }
+       }
+}
+
+int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+                  u32 flags)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5e_xdpsq *sq;
+       int drops = 0;
+       int sq_num;
+       int i;
+
+       if (unlikely(!test_bit(MLX5E_STATE_OPENED, &priv->state)))
+               return -ENETDOWN;
+
+       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+               return -EINVAL;
+
+       sq_num = smp_processor_id();
+
+       if (unlikely(sq_num >= priv->channels.num))
+               return -ENXIO;
+
+       sq = &priv->channels.c[sq_num]->xdpsq;
+
+       if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+               return -ENETDOWN;
+
+       for (i = 0; i < n; i++) {
+               struct xdp_frame *xdpf = frames[i];
+               struct mlx5e_xdp_info xdpi;
+
+               xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len,
+                                              DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) {
+                       xdp_return_frame_rx_napi(xdpf);
+                       drops++;
+                       continue;
+               }
+
+               xdpi.xdpf = xdpf;
+
+               if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) {
+                       dma_unmap_single(sq->pdev, xdpi.dma_addr,
+                                        xdpf->len, DMA_TO_DEVICE);
+                       xdp_return_frame_rx_napi(xdpf);
+                       drops++;
+               }
+       }
+
+       if (flags & XDP_XMIT_FLUSH)
+               mlx5e_xmit_xdp_doorbell(sq);
+
+       return n - drops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
new file mode 100644 (file)
index 0000000..6dfab04
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_XDP_H__
+#define __MLX5_EN_XDP_H__
+
+#include "en.h"
+
+#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
+                                MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
+#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+#define MLX5E_XDP_TX_DS_COUNT \
+       ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+                     void *va, u16 *rx_headroom, u32 *len);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
+
+bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
+int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+                  u32 flags);
+
+static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
+{
+       struct mlx5_wq_cyc *wq = &sq->wq;
+       struct mlx5e_tx_wqe *wqe;
+       u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */
+
+       wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+
+       mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl);
+}
+
+#endif
index f20074dbef32c3c2d2bfafcf500cbccde6ef84c9..1dd225380a66075eeceebe290209bfaf20cddf8c 100644 (file)
 #ifndef __MLX5E_EN_ACCEL_H__
 #define __MLX5E_EN_ACCEL_H__
 
-#ifdef CONFIG_MLX5_ACCEL
-
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include "en_accel/ipsec_rxtx.h"
 #include "en_accel/tls_rxtx.h"
 #include "en.h"
 
-static inline struct sk_buff *mlx5e_accel_handle_tx(struct sk_buff *skb,
-                                                   struct mlx5e_txqsq *sq,
-                                                   struct net_device *dev,
-                                                   struct mlx5e_tx_wqe **wqe,
-                                                   u16 *pi)
+static inline void
+mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)
+{
+       int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr);
+
+       udp_hdr(skb)->len = htons(payload_len);
+}
+
+static inline struct sk_buff *
+mlx5e_accel_handle_tx(struct sk_buff *skb,
+                     struct mlx5e_txqsq *sq,
+                     struct net_device *dev,
+                     struct mlx5e_tx_wqe **wqe,
+                     u16 *pi)
 {
 #ifdef CONFIG_MLX5_EN_TLS
        if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) {
@@ -64,9 +71,10 @@ static inline struct sk_buff *mlx5e_accel_handle_tx(struct sk_buff *skb,
        }
 #endif
 
+       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+               mlx5e_udp_gso_handle_tx_skb(skb);
+
        return skb;
 }
 
-#endif /* CONFIG_MLX5_ACCEL */
-
 #endif /* __MLX5E_EN_ACCEL_H__ */
index c245d8e78509f4c791a4099238eeba0a027948ff..128a82b1dbfc66147c1df824613f5d1e807a6e40 100644 (file)
@@ -37,6 +37,7 @@
 
 #include "en_accel/ipsec_rxtx.h"
 #include "en_accel/ipsec.h"
+#include "accel/accel.h"
 #include "en.h"
 
 enum {
@@ -346,19 +347,12 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
 }
 
 struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
-                                         struct sk_buff *skb)
+                                         struct sk_buff *skb, u32 *cqe_bcnt)
 {
        struct mlx5e_ipsec_metadata *mdata;
-       struct ethhdr *old_eth;
-       struct ethhdr *new_eth;
        struct xfrm_state *xs;
-       __be16 *ethtype;
 
-       /* Detect inline metadata */
-       if (skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN)
-               return skb;
-       ethtype = (__be16 *)(skb->data + ETH_ALEN * 2);
-       if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE))
+       if (!is_metadata_hdr_valid(skb))
                return skb;
 
        /* Use the metadata */
@@ -369,12 +363,8 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
                return NULL;
        }
 
-       /* Remove the metadata from the buffer */
-       old_eth = (struct ethhdr *)skb->data;
-       new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN);
-       memmove(new_eth, old_eth, 2 * ETH_ALEN);
-       /* Ethertype is already in its new place */
-       skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN);
+       remove_metadata_hdr(skb);
+       *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
 
        return skb;
 }
index 2bfbbef1b054a9435344c790c97a580a6dec8950..ca47c0540904aaefe5d44bd309940b2f21716ea0 100644 (file)
@@ -41,7 +41,7 @@
 #include "en.h"
 
 struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
-                                         struct sk_buff *skb);
+                                         struct sk_buff *skb, u32 *cqe_bcnt);
 void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 
 void mlx5e_ipsec_inverse_table_init(void);
index d167845271c33f7e5d568f0b0aa26242cfd98333..eddd7702680bce0284f8518e2f04920116a80e0b 100644 (file)
@@ -110,9 +110,7 @@ static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk,
        u32 caps = mlx5_accel_tls_device_caps(mdev);
        int ret = -ENOMEM;
        void *flow;
-
-       if (direction != TLS_OFFLOAD_CTX_DIR_TX)
-               return -EINVAL;
+       u32 swid;
 
        flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL);
        if (!flow)
@@ -122,18 +120,23 @@ static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk,
        if (ret)
                goto free_flow;
 
+       ret = mlx5_accel_tls_add_flow(mdev, flow, crypto_info,
+                                     start_offload_tcp_sn, &swid,
+                                     direction == TLS_OFFLOAD_CTX_DIR_TX);
+       if (ret < 0)
+               goto free_flow;
+
        if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
-               struct mlx5e_tls_offload_context *tx_ctx =
+               struct mlx5e_tls_offload_context_tx *tx_ctx =
                    mlx5e_get_tls_tx_context(tls_ctx);
-               u32 swid;
-
-               ret = mlx5_accel_tls_add_tx_flow(mdev, flow, crypto_info,
-                                                start_offload_tcp_sn, &swid);
-               if (ret < 0)
-                       goto free_flow;
 
                tx_ctx->swid = htonl(swid);
                tx_ctx->expected_seq = start_offload_tcp_sn;
+       } else {
+               struct mlx5e_tls_offload_context_rx *rx_ctx =
+                   mlx5e_get_tls_rx_context(tls_ctx);
+
+               rx_ctx->handle = htonl(swid);
        }
 
        return 0;
@@ -147,30 +150,60 @@ static void mlx5e_tls_del(struct net_device *netdev,
                          enum tls_offload_ctx_dir direction)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
+       unsigned int handle;
 
-       if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
-               u32 swid = ntohl(mlx5e_get_tls_tx_context(tls_ctx)->swid);
+       handle = ntohl((direction == TLS_OFFLOAD_CTX_DIR_TX) ?
+                      mlx5e_get_tls_tx_context(tls_ctx)->swid :
+                      mlx5e_get_tls_rx_context(tls_ctx)->handle);
 
-               mlx5_accel_tls_del_tx_flow(priv->mdev, swid);
-       } else {
-               netdev_err(netdev, "unsupported direction %d\n", direction);
-       }
+       mlx5_accel_tls_del_flow(priv->mdev, handle,
+                               direction == TLS_OFFLOAD_CTX_DIR_TX);
+}
+
+static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk,
+                               u32 seq, u64 rcd_sn)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5e_tls_offload_context_rx *rx_ctx;
+
+       rx_ctx = mlx5e_get_tls_rx_context(tls_ctx);
+
+       netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq,
+                   be64_to_cpu(rcd_sn));
+       mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn);
+       atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply);
 }
 
 static const struct tlsdev_ops mlx5e_tls_ops = {
        .tls_dev_add = mlx5e_tls_add,
        .tls_dev_del = mlx5e_tls_del,
+       .tls_dev_resync_rx = mlx5e_tls_resync_rx,
 };
 
 void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
 {
+       u32 caps = mlx5_accel_tls_device_caps(priv->mdev);
        struct net_device *netdev = priv->netdev;
 
        if (!mlx5_accel_is_tls_device(priv->mdev))
                return;
 
-       netdev->features |= NETIF_F_HW_TLS_TX;
-       netdev->hw_features |= NETIF_F_HW_TLS_TX;
+       if (caps & MLX5_ACCEL_TLS_TX) {
+               netdev->features          |= NETIF_F_HW_TLS_TX;
+               netdev->hw_features       |= NETIF_F_HW_TLS_TX;
+       }
+
+       if (caps & MLX5_ACCEL_TLS_RX) {
+               netdev->features          |= NETIF_F_HW_TLS_RX;
+               netdev->hw_features       |= NETIF_F_HW_TLS_RX;
+       }
+
+       if (!(caps & MLX5_ACCEL_TLS_LRO)) {
+               netdev->features          &= ~NETIF_F_LRO;
+               netdev->hw_features       &= ~NETIF_F_LRO;
+       }
+
        netdev->tlsdev_ops = &mlx5e_tls_ops;
 }
 
index b6162178f6211371cb72119e89d7a2a908e3059a..3f5d72163b56103c9c8d8c0b6536330e324d848a 100644 (file)
@@ -43,25 +43,44 @@ struct mlx5e_tls_sw_stats {
        atomic64_t tx_tls_drop_resync_alloc;
        atomic64_t tx_tls_drop_no_sync_data;
        atomic64_t tx_tls_drop_bypass_required;
+       atomic64_t rx_tls_drop_resync_request;
+       atomic64_t rx_tls_resync_request;
+       atomic64_t rx_tls_resync_reply;
+       atomic64_t rx_tls_auth_fail;
 };
 
 struct mlx5e_tls {
        struct mlx5e_tls_sw_stats sw_stats;
 };
 
-struct mlx5e_tls_offload_context {
-       struct tls_offload_context base;
+struct mlx5e_tls_offload_context_tx {
+       struct tls_offload_context_tx base;
        u32 expected_seq;
        __be32 swid;
 };
 
-static inline struct mlx5e_tls_offload_context *
+static inline struct mlx5e_tls_offload_context_tx *
 mlx5e_get_tls_tx_context(struct tls_context *tls_ctx)
 {
-       BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context) >
-                    TLS_OFFLOAD_CONTEXT_SIZE);
-       return container_of(tls_offload_ctx(tls_ctx),
-                           struct mlx5e_tls_offload_context,
+       BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_tx) >
+                    TLS_OFFLOAD_CONTEXT_SIZE_TX);
+       return container_of(tls_offload_ctx_tx(tls_ctx),
+                           struct mlx5e_tls_offload_context_tx,
+                           base);
+}
+
+struct mlx5e_tls_offload_context_rx {
+       struct tls_offload_context_rx base;
+       __be32 handle;
+};
+
+static inline struct mlx5e_tls_offload_context_rx *
+mlx5e_get_tls_rx_context(struct tls_context *tls_ctx)
+{
+       BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_rx) >
+                    TLS_OFFLOAD_CONTEXT_SIZE_RX);
+       return container_of(tls_offload_ctx_rx(tls_ctx),
+                           struct mlx5e_tls_offload_context_rx,
                            base);
 }
 
index 15aef71d19576b251891b962bbf4784c90d4cf27..92d37459850eb6ff2a3a1ebc5bee554fb709c884 100644 (file)
 
 #include "en_accel/tls.h"
 #include "en_accel/tls_rxtx.h"
+#include "accel/accel.h"
+
+#include <net/inet6_hashtables.h>
+#include <linux/ipv6.h>
+
+#define SYNDROM_DECRYPTED  0x30
+#define SYNDROM_RESYNC_REQUEST 0x31
+#define SYNDROM_AUTH_FAILED 0x32
 
 #define SYNDROME_OFFLOAD_REQUIRED 32
 #define SYNDROME_SYNC 33
@@ -44,10 +52,26 @@ struct sync_info {
        skb_frag_t frags[MAX_SKB_FRAGS];
 };
 
-struct mlx5e_tls_metadata {
+struct recv_metadata_content {
+       u8 syndrome;
+       u8 reserved;
+       __be32 sync_seq;
+} __packed;
+
+struct send_metadata_content {
        /* One byte of syndrome followed by 3 bytes of swid */
        __be32 syndrome_swid;
        __be16 first_seq;
+} __packed;
+
+struct mlx5e_tls_metadata {
+       union {
+               /* from fpga to host */
+               struct recv_metadata_content recv;
+               /* from host to fpga */
+               struct send_metadata_content send;
+               unsigned char raw[6];
+       } __packed content;
        /* packet type ID field */
        __be16 ethertype;
 } __packed;
@@ -68,12 +92,13 @@ static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid)
                2 * ETH_ALEN);
 
        eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
-       pet->syndrome_swid = htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid;
+       pet->content.send.syndrome_swid =
+               htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid;
 
        return 0;
 }
 
-static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context *context,
+static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context_tx *context,
                                   u32 tcp_seq, struct sync_info *info)
 {
        int remaining, i = 0, ret = -EINVAL;
@@ -149,7 +174,7 @@ static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
 
        pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr));
        memcpy(pet, &syndrome, sizeof(syndrome));
-       pet->first_seq = htons(tcp_seq);
+       pet->content.send.first_seq = htons(tcp_seq);
 
        /* MLX5 devices don't care about the checksum partial start, offset
         * and pseudo header
@@ -161,7 +186,7 @@ static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
 }
 
 static struct sk_buff *
-mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context *context,
+mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
                     struct mlx5e_txqsq *sq, struct sk_buff *skb,
                     struct mlx5e_tx_wqe **wqe,
                     u16 *pi,
@@ -239,7 +264,7 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
                                        u16 *pi)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
-       struct mlx5e_tls_offload_context *context;
+       struct mlx5e_tls_offload_context_tx *context;
        struct tls_context *tls_ctx;
        u32 expected_seq;
        int datalen;
@@ -276,3 +301,83 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
 out:
        return skb;
 }
+
+static int tls_update_resync_sn(struct net_device *netdev,
+                               struct sk_buff *skb,
+                               struct mlx5e_tls_metadata *mdata)
+{
+       struct sock *sk = NULL;
+       struct iphdr *iph;
+       struct tcphdr *th;
+       __be32 seq;
+
+       if (mdata->ethertype != htons(ETH_P_IP))
+               return -EINVAL;
+
+       iph = (struct iphdr *)(mdata + 1);
+
+       th = ((void *)iph) + iph->ihl * 4;
+
+       if (iph->version == 4) {
+               sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
+                                            iph->saddr, th->source, iph->daddr,
+                                            th->dest, netdev->ifindex);
+#if IS_ENABLED(CONFIG_IPV6)
+       } else {
+               struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph;
+
+               sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
+                                               &ipv6h->saddr, th->source,
+                                               &ipv6h->daddr, th->dest,
+                                               netdev->ifindex, 0);
+#endif
+       }
+       if (!sk || sk->sk_state == TCP_TIME_WAIT) {
+               struct mlx5e_priv *priv = netdev_priv(netdev);
+
+               atomic64_inc(&priv->tls->sw_stats.rx_tls_drop_resync_request);
+               goto out;
+       }
+
+       skb->sk = sk;
+       skb->destructor = sock_edemux;
+
+       memcpy(&seq, &mdata->content.recv.sync_seq, sizeof(seq));
+       tls_offload_rx_resync_request(sk, seq);
+out:
+       return 0;
+}
+
+void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
+                            u32 *cqe_bcnt)
+{
+       struct mlx5e_tls_metadata *mdata;
+       struct mlx5e_priv *priv;
+
+       if (!is_metadata_hdr_valid(skb))
+               return;
+
+       /* Use the metadata */
+       mdata = (struct mlx5e_tls_metadata *)(skb->data + ETH_HLEN);
+       switch (mdata->content.recv.syndrome) {
+       case SYNDROM_DECRYPTED:
+               skb->decrypted = 1;
+               break;
+       case SYNDROM_RESYNC_REQUEST:
+               tls_update_resync_sn(netdev, skb, mdata);
+               priv = netdev_priv(netdev);
+               atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_request);
+               break;
+       case SYNDROM_AUTH_FAILED:
+               /* Authentication failure will be observed and verified by kTLS */
+               priv = netdev_priv(netdev);
+               atomic64_inc(&priv->tls->sw_stats.rx_tls_auth_fail);
+               break;
+       default:
+               /* Bypass the metadata header to others */
+               return;
+       }
+
+       remove_metadata_hdr(skb);
+       *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
+}
index 405dfd302225c4f3737efe79efe0b7c07b9d6871..311667ec71b89b5007370eb75b9cf2e525b131bc 100644 (file)
@@ -45,6 +45,9 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
                                        struct mlx5e_tx_wqe **wqe,
                                        u16 *pi);
 
+void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
+                            u32 *cqe_bcnt);
+
 #endif /* CONFIG_MLX5_EN_TLS */
 
 #endif /* __MLX5E_TLS_RXTX_H__ */
index c592678ab5f14b884c822ffef6e288abe66f5ba5..a2fb21ca5767cba5d1f507bb5dded744ef86e12c 100644 (file)
@@ -45,8 +45,9 @@
 #include "en_accel/tls.h"
 #include "accel/ipsec.h"
 #include "accel/tls.h"
-#include "vxlan.h"
+#include "lib/vxlan.h"
 #include "en/port.h"
+#include "en/xdp.h"
 
 struct mlx5e_rq_param {
        u32                     rqc[MLX5_ST_SZ_DW(rqc)];
@@ -96,14 +97,19 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 
 static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
 {
-       if (!params->xdp_prog) {
-               u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-               u16 rq_headroom = MLX5_RX_HEADROOM + NET_IP_ALIGN;
+       u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+       u16 linear_rq_headroom = params->xdp_prog ?
+               XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
+       u32 frag_sz;
 
-               return MLX5_SKB_FRAG_SZ(rq_headroom + hw_mtu);
-       }
+       linear_rq_headroom += NET_IP_ALIGN;
+
+       frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);
 
-       return PAGE_SIZE;
+       if (params->xdp_prog && frag_sz < PAGE_SIZE)
+               frag_sz = PAGE_SIZE;
+
+       return frag_sz;
 }
 
 static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
@@ -270,12 +276,9 @@ void mlx5e_update_stats_work(struct work_struct *work)
        struct delayed_work *dwork = to_delayed_work(work);
        struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv,
                                               update_stats_work);
+
        mutex_lock(&priv->state_lock);
-       if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-               priv->profile->update_stats(priv);
-               queue_delayed_work(priv->wq, dwork,
-                                  msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL));
-       }
+       priv->profile->update_stats(priv);
        mutex_unlock(&priv->state_lock);
 }
 
@@ -352,8 +355,9 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 {
        int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
 
-       rq->mpwqe.info = kcalloc_node(wq_sz, sizeof(*rq->mpwqe.info),
-                                     GFP_KERNEL, cpu_to_node(c->cpu));
+       rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
+                                                 sizeof(*rq->mpwqe.info)),
+                                      GFP_KERNEL, cpu_to_node(c->cpu));
        if (!rq->mpwqe.info)
                return -ENOMEM;
 
@@ -487,7 +491,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
        rq->channel = c;
        rq->ix      = c->ix;
        rq->mdev    = mdev;
-       rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
        rq->stats   = &c->priv->channel_stats[c->ix].rq;
 
        rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
@@ -670,7 +673,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 err_free:
        switch (rq->wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               kfree(rq->mpwqe.info);
+               kvfree(rq->mpwqe.info);
                mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
                break;
        default: /* MLX5_WQ_TYPE_CYCLIC */
@@ -702,7 +705,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 
        switch (rq->wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               kfree(rq->mpwqe.info);
+               kvfree(rq->mpwqe.info);
                mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
                break;
        default: /* MLX5_WQ_TYPE_CYCLIC */
@@ -879,7 +882,7 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 
                /* UMR WQE (if in progress) is always at wq->head */
                if (rq->mpwqe.umr_in_progress)
-                       mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
+                       rq->dealloc_wqe(rq, wq->head);
 
                while (!mlx5_wq_ll_is_empty(wq)) {
                        struct mlx5e_rx_wqe_ll *wqe;
@@ -965,16 +968,16 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq)
 
 static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
 {
-       kfree(sq->db.di);
+       kvfree(sq->db.xdpi);
 }
 
 static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
 {
        int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 
-       sq->db.di = kcalloc_node(wq_sz, sizeof(*sq->db.di),
-                                    GFP_KERNEL, numa);
-       if (!sq->db.di) {
+       sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)),
+                                   GFP_KERNEL, numa);
+       if (!sq->db.xdpi) {
                mlx5e_free_xdpsq_db(sq);
                return -ENOMEM;
        }
@@ -985,7 +988,8 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
 static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
                             struct mlx5e_params *params,
                             struct mlx5e_sq_param *param,
-                            struct mlx5e_xdpsq *sq)
+                            struct mlx5e_xdpsq *sq,
+                            bool is_redirect)
 {
        void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
        struct mlx5_core_dev *mdev = c->mdev;
@@ -997,6 +1001,10 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
        sq->channel   = c;
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
        sq->min_inline_mode = params->tx_min_inline_mode;
+       sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+       sq->stats     = is_redirect ?
+               &c->priv->channel_stats[c->ix].xdpsq :
+               &c->priv->channel_stats[c->ix].rq_xdpsq;
 
        param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1024,15 +1032,16 @@ static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
 
 static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
 {
-       kfree(sq->db.ico_wqe);
+       kvfree(sq->db.ico_wqe);
 }
 
 static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
 {
        u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 
-       sq->db.ico_wqe = kcalloc_node(wq_sz, sizeof(*sq->db.ico_wqe),
-                                     GFP_KERNEL, numa);
+       sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz,
+                                                 sizeof(*sq->db.ico_wqe)),
+                                      GFP_KERNEL, numa);
        if (!sq->db.ico_wqe)
                return -ENOMEM;
 
@@ -1077,8 +1086,8 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
 
 static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
 {
-       kfree(sq->db.wqe_info);
-       kfree(sq->db.dma_fifo);
+       kvfree(sq->db.wqe_info);
+       kvfree(sq->db.dma_fifo);
 }
 
 static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
@@ -1086,10 +1095,12 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
        int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
        int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
 
-       sq->db.dma_fifo = kcalloc_node(df_sz, sizeof(*sq->db.dma_fifo),
-                                          GFP_KERNEL, numa);
-       sq->db.wqe_info = kcalloc_node(wq_sz, sizeof(*sq->db.wqe_info),
-                                          GFP_KERNEL, numa);
+       sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
+                                                  sizeof(*sq->db.dma_fifo)),
+                                       GFP_KERNEL, numa);
+       sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
+                                                  sizeof(*sq->db.wqe_info)),
+                                       GFP_KERNEL, numa);
        if (!sq->db.dma_fifo || !sq->db.wqe_info) {
                mlx5e_free_txqsq_db(sq);
                return -ENOMEM;
@@ -1523,7 +1534,8 @@ static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
 static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
                            struct mlx5e_params *params,
                            struct mlx5e_sq_param *param,
-                           struct mlx5e_xdpsq *sq)
+                           struct mlx5e_xdpsq *sq,
+                           bool is_redirect)
 {
        unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
        struct mlx5e_create_sq_param csp = {};
@@ -1531,7 +1543,7 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
        int err;
        int i;
 
-       err = mlx5e_alloc_xdpsq(c, params, param, sq);
+       err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
        if (err)
                return err;
 
@@ -1540,6 +1552,8 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
        csp.cqn             = sq->cq.mcq.cqn;
        csp.wq_ctrl         = &sq->wq_ctrl;
        csp.min_inline_mode = sq->min_inline_mode;
+       if (is_redirect)
+               set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
        set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
        err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
        if (err)
@@ -1893,7 +1907,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        int err;
        int eqn;
 
-       c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
+       c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
        if (!c)
                return -ENOMEM;
 
@@ -1922,10 +1936,14 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        if (err)
                goto err_close_icosq_cq;
 
-       err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq);
+       err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xdpsq.cq);
        if (err)
                goto err_close_tx_cqs;
 
+       err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq);
+       if (err)
+               goto err_close_xdp_tx_cqs;
+
        /* XDP SQ CQ params are same as normal TXQ sq CQ params */
        err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
                                     &cparam->tx_cq, &c->rq.xdpsq.cq) : 0;
@@ -1942,7 +1960,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        if (err)
                goto err_close_icosq;
 
-       err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq) : 0;
+       err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0;
        if (err)
                goto err_close_sqs;
 
@@ -1950,9 +1968,17 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        if (err)
                goto err_close_xdp_sq;
 
+       err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true);
+       if (err)
+               goto err_close_rq;
+
        *cp = c;
 
        return 0;
+
+err_close_rq:
+       mlx5e_close_rq(&c->rq);
+
 err_close_xdp_sq:
        if (c->xdp)
                mlx5e_close_xdpsq(&c->rq.xdpsq);
@@ -1971,6 +1997,9 @@ err_disable_napi:
 err_close_rx_cq:
        mlx5e_close_cq(&c->rq.cq);
 
+err_close_xdp_tx_cqs:
+       mlx5e_close_cq(&c->xdpsq.cq);
+
 err_close_tx_cqs:
        mlx5e_close_tx_cqs(c);
 
@@ -1979,7 +2008,7 @@ err_close_icosq_cq:
 
 err_napi_del:
        netif_napi_del(&c->napi);
-       kfree(c);
+       kvfree(c);
 
        return err;
 }
@@ -2005,6 +2034,7 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
 
 static void mlx5e_close_channel(struct mlx5e_channel *c)
 {
+       mlx5e_close_xdpsq(&c->xdpsq);
        mlx5e_close_rq(&c->rq);
        if (c->xdp)
                mlx5e_close_xdpsq(&c->rq.xdpsq);
@@ -2014,11 +2044,12 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
        if (c->xdp)
                mlx5e_close_cq(&c->rq.xdpsq.cq);
        mlx5e_close_cq(&c->rq.cq);
+       mlx5e_close_cq(&c->xdpsq.cq);
        mlx5e_close_tx_cqs(c);
        mlx5e_close_cq(&c->icosq.cq);
        netif_napi_del(&c->napi);
 
-       kfree(c);
+       kvfree(c);
 }
 
 #define DEFAULT_FRAG_SIZE (2048)
@@ -2276,7 +2307,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
        chs->num = chs->params.num_channels;
 
        chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL);
-       cparam = kzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL);
+       cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL);
        if (!chs->c || !cparam)
                goto err_free;
 
@@ -2287,7 +2318,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
                        goto err_close_channels;
        }
 
-       kfree(cparam);
+       kvfree(cparam);
        return 0;
 
 err_close_channels:
@@ -2296,7 +2327,7 @@ err_close_channels:
 
 err_free:
        kfree(chs->c);
-       kfree(cparam);
+       kvfree(cparam);
        chs->num = 0;
        return err;
 }
@@ -2943,7 +2974,7 @@ int mlx5e_open(struct net_device *netdev)
                mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
        mutex_unlock(&priv->state_lock);
 
-       if (mlx5e_vxlan_allowed(priv->mdev))
+       if (mlx5_vxlan_allowed(priv->mdev->vxlan))
                udp_tunnel_get_rx_info(netdev);
 
        return err;
@@ -3371,7 +3402,7 @@ static int mlx5e_setup_tc_block(struct net_device *dev,
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb,
-                                            priv, priv);
+                                            priv, priv, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb,
                                        priv);
@@ -3405,6 +3436,9 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
        struct mlx5e_vport_stats *vstats = &priv->stats.vport;
        struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 
+       /* update HW stats in background for next time */
+       queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
+
        if (mlx5e_is_uplink_rep(priv)) {
                stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
                stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
@@ -3703,6 +3737,14 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
        new_channels.params = *params;
        new_channels.params.sw_mtu = new_mtu;
 
+       if (params->xdp_prog &&
+           !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
+               netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
+                          new_mtu, MLX5E_XDP_MAX_MTU);
+               err = -EINVAL;
+               goto out;
+       }
+
        if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
                u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params);
                u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params);
@@ -3928,6 +3970,57 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
 }
 #endif
 
+struct mlx5e_vxlan_work {
+       struct work_struct      work;
+       struct mlx5e_priv       *priv;
+       u16                     port;
+};
+
+static void mlx5e_vxlan_add_work(struct work_struct *work)
+{
+       struct mlx5e_vxlan_work *vxlan_work =
+               container_of(work, struct mlx5e_vxlan_work, work);
+       struct mlx5e_priv *priv = vxlan_work->priv;
+       u16 port = vxlan_work->port;
+
+       mutex_lock(&priv->state_lock);
+       mlx5_vxlan_add_port(priv->mdev->vxlan, port);
+       mutex_unlock(&priv->state_lock);
+
+       kfree(vxlan_work);
+}
+
+static void mlx5e_vxlan_del_work(struct work_struct *work)
+{
+       struct mlx5e_vxlan_work *vxlan_work =
+               container_of(work, struct mlx5e_vxlan_work, work);
+       struct mlx5e_priv *priv         = vxlan_work->priv;
+       u16 port = vxlan_work->port;
+
+       mutex_lock(&priv->state_lock);
+       mlx5_vxlan_del_port(priv->mdev->vxlan, port);
+       mutex_unlock(&priv->state_lock);
+       kfree(vxlan_work);
+}
+
+static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
+{
+       struct mlx5e_vxlan_work *vxlan_work;
+
+       vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC);
+       if (!vxlan_work)
+               return;
+
+       if (add)
+               INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work);
+       else
+               INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work);
+
+       vxlan_work->priv = priv;
+       vxlan_work->port = port;
+       queue_work(priv->wq, &vxlan_work->work);
+}
+
 static void mlx5e_add_vxlan_port(struct net_device *netdev,
                                 struct udp_tunnel_info *ti)
 {
@@ -3936,10 +4029,10 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev,
        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
                return;
 
-       if (!mlx5e_vxlan_allowed(priv->mdev))
+       if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
                return;
 
-       mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1);
+       mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
 }
 
 static void mlx5e_del_vxlan_port(struct net_device *netdev,
@@ -3950,10 +4043,10 @@ static void mlx5e_del_vxlan_port(struct net_device *netdev,
        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
                return;
 
-       if (!mlx5e_vxlan_allowed(priv->mdev))
+       if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
                return;
 
-       mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0);
+       mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0);
 }
 
 static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
@@ -3984,7 +4077,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
                port = be16_to_cpu(udph->dest);
 
                /* Verify if UDP port is being offloaded by HW */
-               if (mlx5e_vxlan_lookup_port(priv, port))
+               if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
                        return features;
        }
 
@@ -4091,26 +4184,47 @@ static void mlx5e_tx_timeout(struct net_device *dev)
        queue_work(priv->wq, &priv->tx_timeout_work);
 }
 
+static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
+{
+       struct net_device *netdev = priv->netdev;
+       struct mlx5e_channels new_channels = {};
+
+       if (priv->channels.params.lro_en) {
+               netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
+               return -EINVAL;
+       }
+
+       if (MLX5_IPSEC_DEV(priv->mdev)) {
+               netdev_warn(netdev, "can't set XDP with IPSec offload\n");
+               return -EINVAL;
+       }
+
+       new_channels.params = priv->channels.params;
+       new_channels.params.xdp_prog = prog;
+
+       if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
+               netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
+                           new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
        struct bpf_prog *old_prog;
-       int err = 0;
        bool reset, was_opened;
+       int err = 0;
        int i;
 
        mutex_lock(&priv->state_lock);
 
-       if ((netdev->features & NETIF_F_LRO) && prog) {
-               netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
-               err = -EINVAL;
-               goto unlock;
-       }
-
-       if ((netdev->features & NETIF_F_HW_ESP) && prog) {
-               netdev_warn(netdev, "can't set XDP with IPSec offload\n");
-               err = -EINVAL;
-               goto unlock;
+       if (prog) {
+               err = mlx5e_xdp_allowed(priv, prog);
+               if (err)
+                       goto unlock;
        }
 
        was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
@@ -4193,7 +4307,6 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
                return mlx5e_xdp_set(dev, xdp->prog);
        case XDP_QUERY_PROG:
                xdp->prog_id = mlx5e_xdp_query(dev);
-               xdp->prog_attached = !!xdp->prog_id;
                return 0;
        default:
                return -EINVAL;
@@ -4240,6 +4353,7 @@ static const struct net_device_ops mlx5e_netdev_ops = {
 #endif
        .ndo_tx_timeout          = mlx5e_tx_timeout,
        .ndo_bpf                 = mlx5e_xdp,
+       .ndo_xdp_xmit            = mlx5e_xdp_xmit,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller     = mlx5e_netpoll,
 #endif
@@ -4535,8 +4649,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
        netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
 
-       if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
-               netdev->hw_features     |= NETIF_F_GSO_PARTIAL;
+       if (mlx5_vxlan_allowed(mdev->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
                netdev->hw_enc_features |= NETIF_F_IP_CSUM;
                netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
                netdev->hw_enc_features |= NETIF_F_TSO;
@@ -4544,7 +4657,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
                netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
        }
 
-       if (mlx5e_vxlan_allowed(mdev)) {
+       if (mlx5_vxlan_allowed(mdev->vxlan)) {
                netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
                                           NETIF_F_GSO_UDP_TUNNEL_CSUM;
                netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
@@ -4561,6 +4674,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
                                                NETIF_F_GSO_GRE_CSUM;
        }
 
+       netdev->hw_features                      |= NETIF_F_GSO_PARTIAL;
+       netdev->gso_partial_features             |= NETIF_F_GSO_UDP_L4;
+       netdev->hw_features                      |= NETIF_F_GSO_UDP_L4;
+       netdev->features                         |= NETIF_F_GSO_UDP_L4;
+
        mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
 
        if (fcs_supported)
@@ -4650,14 +4768,12 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
                mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
        mlx5e_build_nic_netdev(netdev);
        mlx5e_build_tc2txq_maps(priv);
-       mlx5e_vxlan_init(priv);
 }
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
        mlx5e_tls_cleanup(priv);
        mlx5e_ipsec_cleanup(priv);
-       mlx5e_vxlan_cleanup(priv);
 }
 
 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
index 2b8040a3cdbd7c2f74bb854bd8141ba379ea37de..8e3c5b4b90ab9d77e7833a1ce4db7db042b108dc 100644 (file)
@@ -797,7 +797,7 @@ static int mlx5e_rep_setup_tc_block(struct net_device *dev,
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
-                                            priv, priv);
+                                            priv, priv, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
                return 0;
@@ -897,6 +897,9 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
 
+       /* update HW stats in background for next time */
+       queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
+
        memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
 }
 
index d3a1dd20e41d4c8b3d68669d1fc40a9b4e1e63e4..15d8ae28c040c17e50d37928adac65ead8311893 100644 (file)
@@ -34,7 +34,6 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
-#include <linux/bpf_trace.h>
 #include <net/busy_poll.h>
 #include <net/ip6_checksum.h>
 #include <net/page_pool.h>
@@ -44,7 +43,9 @@
 #include "en_rep.h"
 #include "ipoib/ipoib.h"
 #include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls_rxtx.h"
 #include "lib/clock.h"
+#include "en/xdp.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
 {
@@ -238,8 +239,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
        return 0;
 }
 
-static void mlx5e_page_dma_unmap(struct mlx5e_rq *rq,
-                                       struct mlx5e_dma_info *dma_info)
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
 {
        dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
 }
@@ -276,10 +276,11 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
 }
 
 static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
-                                    struct mlx5e_wqe_frag_info *frag)
+                                    struct mlx5e_wqe_frag_info *frag,
+                                    bool recycle)
 {
        if (frag->last_in_page)
-               mlx5e_page_release(rq, frag->di, true);
+               mlx5e_page_release(rq, frag->di, recycle);
 }
 
 static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
@@ -307,25 +308,26 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
 
 free_frags:
        while (--i >= 0)
-               mlx5e_put_rx_frag(rq, --frag);
+               mlx5e_put_rx_frag(rq, --frag, true);
 
        return err;
 }
 
 static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
-                                    struct mlx5e_wqe_frag_info *wi)
+                                    struct mlx5e_wqe_frag_info *wi,
+                                    bool recycle)
 {
        int i;
 
        for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
-               mlx5e_put_rx_frag(rq, wi);
+               mlx5e_put_rx_frag(rq, wi, recycle);
 }
 
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
 {
        struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
 
-       mlx5e_free_rx_wqe(rq, wi);
+       mlx5e_free_rx_wqe(rq, wi, false);
 }
 
 static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
@@ -395,7 +397,8 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev,
        }
 }
 
-void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
+static void
+mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
 {
        const bool no_xdp_xmit =
                bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
@@ -404,7 +407,7 @@ void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 
        for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
                if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
-                       mlx5e_page_release(rq, &dma_info[i], true);
+                       mlx5e_page_release(rq, &dma_info[i], recycle);
 }
 
 static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
@@ -487,7 +490,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 
        sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
        sq->pc += MLX5E_UMR_WQEBBS;
-       mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &umr_wqe->ctrl);
+       mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &umr_wqe->ctrl);
 
        return 0;
 
@@ -504,8 +507,8 @@ err_unmap:
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
        struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
-
-       mlx5e_free_rx_mpwqe(rq, wi);
+       /* Don't recycle, this function is called on rq/netdev close */
+       mlx5e_free_rx_mpwqe(rq, wi, false);
 }
 
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
@@ -601,6 +604,8 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 
        if (!rq->mpwqe.umr_in_progress)
                mlx5e_alloc_rx_mpwqe(rq, wq->head);
+       else
+               rq->stats->congst_umr += mlx5_wq_ll_missing(wq) > 2;
 
        return false;
 }
@@ -795,6 +800,11 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
        struct net_device *netdev = rq->netdev;
 
        skb->mac_len = ETH_HLEN;
+
+#ifdef CONFIG_MLX5_EN_TLS
+       mlx5e_tls_handle_rx_skb(netdev, skb, &cqe_bcnt);
+#endif
+
        if (lro_num_seg > 1) {
                mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
                skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
@@ -839,135 +849,6 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
        mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
 }
 
-static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
-{
-       struct mlx5_wq_cyc *wq = &sq->wq;
-       struct mlx5e_tx_wqe *wqe;
-       u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */
-
-       wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
-
-       mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl);
-}
-
-static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
-                                       struct mlx5e_dma_info *di,
-                                       const struct xdp_buff *xdp)
-{
-       struct mlx5e_xdpsq       *sq   = &rq->xdpsq;
-       struct mlx5_wq_cyc       *wq   = &sq->wq;
-       u16                       pi   = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-       struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
-
-       struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
-       struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
-       struct mlx5_wqe_data_seg *dseg;
-
-       ptrdiff_t data_offset = xdp->data - xdp->data_hard_start;
-       dma_addr_t dma_addr  = di->addr + data_offset;
-       unsigned int dma_len = xdp->data_end - xdp->data;
-
-       struct mlx5e_rq_stats *stats = rq->stats;
-
-       prefetchw(wqe);
-
-       if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || rq->hw_mtu < dma_len)) {
-               stats->xdp_drop++;
-               return false;
-       }
-
-       if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
-               if (sq->db.doorbell) {
-                       /* SQ is full, ring doorbell */
-                       mlx5e_xmit_xdp_doorbell(sq);
-                       sq->db.doorbell = false;
-               }
-               stats->xdp_tx_full++;
-               return false;
-       }
-
-       dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE);
-
-       cseg->fm_ce_se = 0;
-
-       dseg = (struct mlx5_wqe_data_seg *)eseg + 1;
-
-       /* copy the inline part if required */
-       if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
-               memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE);
-               eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
-               dma_len  -= MLX5E_XDP_MIN_INLINE;
-               dma_addr += MLX5E_XDP_MIN_INLINE;
-               dseg++;
-       }
-
-       /* write the dma part */
-       dseg->addr       = cpu_to_be64(dma_addr);
-       dseg->byte_count = cpu_to_be32(dma_len);
-
-       cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
-
-       /* move page to reference to sq responsibility,
-        * and mark so it's not put back in page-cache.
-        */
-       __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
-       sq->db.di[pi] = *di;
-       sq->pc++;
-
-       sq->db.doorbell = true;
-
-       stats->xdp_tx++;
-       return true;
-}
-
-/* returns true if packet was consumed by xdp */
-static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
-                                   struct mlx5e_dma_info *di,
-                                   void *va, u16 *rx_headroom, u32 *len)
-{
-       struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
-       struct xdp_buff xdp;
-       u32 act;
-       int err;
-
-       if (!prog)
-               return false;
-
-       xdp.data = va + *rx_headroom;
-       xdp_set_data_meta_invalid(&xdp);
-       xdp.data_end = xdp.data + *len;
-       xdp.data_hard_start = va;
-       xdp.rxq = &rq->xdp_rxq;
-
-       act = bpf_prog_run_xdp(prog, &xdp);
-       switch (act) {
-       case XDP_PASS:
-               *rx_headroom = xdp.data - xdp.data_hard_start;
-               *len = xdp.data_end - xdp.data;
-               return false;
-       case XDP_TX:
-               if (unlikely(!mlx5e_xmit_xdp_frame(rq, di, &xdp)))
-                       trace_xdp_exception(rq->netdev, prog, act);
-               return true;
-       case XDP_REDIRECT:
-               /* When XDP enabled then page-refcnt==1 here */
-               err = xdp_do_redirect(rq->netdev, &xdp, prog);
-               if (!err) {
-                       __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
-                       rq->xdpsq.db.redirect_flush = true;
-                       mlx5e_page_dma_unmap(rq, di);
-               }
-               return true;
-       default:
-               bpf_warn_invalid_xdp_action(act);
-       case XDP_ABORTED:
-               trace_xdp_exception(rq->netdev, prog, act);
-       case XDP_DROP:
-               rq->stats->xdp_drop++;
-               return true;
-       }
-}
-
 static inline
 struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
                                       u32 frag_size, u16 headroom,
@@ -1105,7 +986,7 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
        napi_gro_receive(rq->cq.napi, skb);
 
 free_wqe:
-       mlx5e_free_rx_wqe(rq, wi);
+       mlx5e_free_rx_wqe(rq, wi, true);
 wq_cyc_pop:
        mlx5_wq_cyc_pop(wq);
 }
@@ -1147,7 +1028,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
        napi_gro_receive(rq->cq.napi, skb);
 
 free_wqe:
-       mlx5e_free_rx_wqe(rq, wi);
+       mlx5e_free_rx_wqe(rq, wi, true);
 wq_cyc_pop:
        mlx5_wq_cyc_pop(wq);
 }
@@ -1218,6 +1099,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 
        dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
                                      frag_size, DMA_FROM_DEVICE);
+       prefetchw(va); /* xdp_frame data area */
        prefetch(data);
 
        rcu_read_lock();
@@ -1261,7 +1143,10 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
        }
 
        if (unlikely(mpwrq_is_filler_cqe(cqe))) {
-               rq->stats->mpwqe_filler++;
+               struct mlx5e_rq_stats *stats = rq->stats;
+
+               stats->mpwqe_filler_cqes++;
+               stats->mpwqe_filler_strides += cstrides;
                goto mpwrq_cqe_out;
        }
 
@@ -1281,7 +1166,7 @@ mpwrq_cqe_out:
 
        wq  = &rq->mpwqe.wq;
        wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
-       mlx5e_free_rx_mpwqe(rq, wi);
+       mlx5e_free_rx_mpwqe(rq, wi, true);
        mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
 }
 
@@ -1317,14 +1202,14 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
                rq->handle_rx_cqe(rq, cqe);
        } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
-       if (xdpsq->db.doorbell) {
+       if (xdpsq->doorbell) {
                mlx5e_xmit_xdp_doorbell(xdpsq);
-               xdpsq->db.doorbell = false;
+               xdpsq->doorbell = false;
        }
 
-       if (xdpsq->db.redirect_flush) {
+       if (xdpsq->redirect_flush) {
                xdp_do_flush_map();
-               xdpsq->db.redirect_flush = false;
+               xdpsq->redirect_flush = false;
        }
 
        mlx5_cqwq_update_db_record(&cq->wq);
@@ -1335,78 +1220,6 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
        return work_done;
 }
 
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
-{
-       struct mlx5e_xdpsq *sq;
-       struct mlx5_cqe64 *cqe;
-       struct mlx5e_rq *rq;
-       u16 sqcc;
-       int i;
-
-       sq = container_of(cq, struct mlx5e_xdpsq, cq);
-
-       if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
-               return false;
-
-       cqe = mlx5_cqwq_get_cqe(&cq->wq);
-       if (!cqe)
-               return false;
-
-       rq = container_of(sq, struct mlx5e_rq, xdpsq);
-
-       /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
-        * otherwise a cq overrun may occur
-        */
-       sqcc = sq->cc;
-
-       i = 0;
-       do {
-               u16 wqe_counter;
-               bool last_wqe;
-
-               mlx5_cqwq_pop(&cq->wq);
-
-               wqe_counter = be16_to_cpu(cqe->wqe_counter);
-
-               do {
-                       struct mlx5e_dma_info *di;
-                       u16 ci;
-
-                       last_wqe = (sqcc == wqe_counter);
-
-                       ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
-                       di = &sq->db.di[ci];
-
-                       sqcc++;
-                       /* Recycle RX page */
-                       mlx5e_page_release(rq, di, true);
-               } while (!last_wqe);
-       } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
-
-       mlx5_cqwq_update_db_record(&cq->wq);
-
-       /* ensure cq space is freed before enabling more cqes */
-       wmb();
-
-       sq->cc = sqcc;
-       return (i == MLX5E_TX_CQ_POLL_BUDGET);
-}
-
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
-{
-       struct mlx5e_rq *rq = container_of(sq, struct mlx5e_rq, xdpsq);
-       struct mlx5e_dma_info *di;
-       u16 ci;
-
-       while (sq->cc != sq->pc) {
-               ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
-               di = &sq->db.di[ci];
-               sq->cc++;
-
-               mlx5e_page_release(rq, di, false);
-       }
-}
-
 #ifdef CONFIG_MLX5_CORE_IPOIB
 
 #define MLX5_IB_GRH_DGID_OFFSET 24
@@ -1508,7 +1321,7 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
        napi_gro_receive(rq->cq.napi, skb);
 
 wq_free_wqe:
-       mlx5e_free_rx_wqe(rq, wi);
+       mlx5e_free_rx_wqe(rq, wi, true);
        mlx5_wq_cyc_pop(wq);
 }
 
@@ -1531,19 +1344,19 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
        skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
        if (unlikely(!skb)) {
                /* a DROP, save the page-reuse checks */
-               mlx5e_free_rx_wqe(rq, wi);
+               mlx5e_free_rx_wqe(rq, wi, true);
                goto wq_cyc_pop;
        }
-       skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb);
+       skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt);
        if (unlikely(!skb)) {
-               mlx5e_free_rx_wqe(rq, wi);
+               mlx5e_free_rx_wqe(rq, wi, true);
                goto wq_cyc_pop;
        }
 
        mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
        napi_gro_receive(rq->cq.napi, skb);
 
-       mlx5e_free_rx_wqe(rq, wi);
+       mlx5e_free_rx_wqe(rq, wi, true);
 wq_cyc_pop:
        mlx5_wq_cyc_pop(wq);
 }
index 1646859974ce2b8fc76798693a193e8b40d9e158..12fdf5c92b67f7c8a32f5567ca6784b30145f699 100644 (file)
@@ -44,6 +44,7 @@ static const struct counter_desc sw_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
 
 #ifdef CONFIG_MLX5_EN_TLS
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
@@ -58,8 +59,11 @@ static const struct counter_desc sw_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
-       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
@@ -67,10 +71,17 @@ static const struct counter_desc sw_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_udp_seg_rem) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
-       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_cqes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_strides) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
@@ -80,6 +91,11 @@ static const struct counter_desc sw_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) },
 };
 
@@ -118,6 +134,8 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
        for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) {
                struct mlx5e_channel_stats *channel_stats =
                        &priv->channel_stats[i];
+               struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
+               struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq;
                struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
                struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
                int j;
@@ -131,11 +149,15 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
                s->rx_csum_complete += rq_stats->csum_complete;
                s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
                s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
-               s->rx_xdp_drop += rq_stats->xdp_drop;
-               s->rx_xdp_tx += rq_stats->xdp_tx;
-               s->rx_xdp_tx_full += rq_stats->xdp_tx_full;
+               s->rx_xdp_drop     += rq_stats->xdp_drop;
+               s->rx_xdp_redirect += rq_stats->xdp_redirect;
+               s->rx_xdp_tx_xmit  += xdpsq_stats->xmit;
+               s->rx_xdp_tx_full  += xdpsq_stats->full;
+               s->rx_xdp_tx_err   += xdpsq_stats->err;
+               s->rx_xdp_tx_cqe   += xdpsq_stats->cqes;
                s->rx_wqe_err   += rq_stats->wqe_err;
-               s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
+               s->rx_mpwqe_filler_cqes    += rq_stats->mpwqe_filler_cqes;
+               s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides;
                s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
                s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
                s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
@@ -145,7 +167,17 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
                s->rx_cache_empty += rq_stats->cache_empty;
                s->rx_cache_busy  += rq_stats->cache_busy;
                s->rx_cache_waive += rq_stats->cache_waive;
-               s->ch_eq_rearm += ch_stats->eq_rearm;
+               s->rx_congst_umr  += rq_stats->congst_umr;
+               s->ch_events      += ch_stats->events;
+               s->ch_poll        += ch_stats->poll;
+               s->ch_arm         += ch_stats->arm;
+               s->ch_aff_change  += ch_stats->aff_change;
+               s->ch_eq_rearm    += ch_stats->eq_rearm;
+               /* xdp redirect */
+               s->tx_xdp_xmit    += xdpsq_red_stats->xmit;
+               s->tx_xdp_full    += xdpsq_red_stats->full;
+               s->tx_xdp_err     += xdpsq_red_stats->err;
+               s->tx_xdp_cqes    += xdpsq_red_stats->cqes;
 
                for (j = 0; j < priv->max_opened_tc; j++) {
                        struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
@@ -157,8 +189,10 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
                        s->tx_tso_inner_packets += sq_stats->tso_inner_packets;
                        s->tx_tso_inner_bytes   += sq_stats->tso_inner_bytes;
                        s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
+                       s->tx_nop               += sq_stats->nop;
                        s->tx_queue_stopped     += sq_stats->stopped;
                        s->tx_queue_wake        += sq_stats->wake;
+                       s->tx_udp_seg_rem       += sq_stats->udp_seg_rem;
                        s->tx_queue_dropped     += sq_stats->dropped;
                        s->tx_cqe_err           += sq_stats->cqe_err;
                        s->tx_recover           += sq_stats->recover;
@@ -170,6 +204,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
                        s->tx_tls_ooo           += sq_stats->tls_ooo;
                        s->tx_tls_resync_bytes  += sq_stats->tls_resync_bytes;
 #endif
+                       s->tx_cqes              += sq_stats->cqes;
                }
        }
 
@@ -1106,13 +1141,13 @@ static const struct counter_desc rq_stats_desc[] = {
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) },
-       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) },
-       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
-       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
@@ -1122,6 +1157,7 @@ static const struct counter_desc rq_stats_desc[] = {
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
 };
 
 static const struct counter_desc sq_stats_desc[] = {
@@ -1140,16 +1176,37 @@ static const struct counter_desc sq_stats_desc[] = {
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) },
+       { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqes) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
 };
 
+static const struct counter_desc rq_xdpsq_stats_desc[] = {
+       { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+       { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+       { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+       { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
+static const struct counter_desc xdpsq_stats_desc[] = {
+       { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+       { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+       { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+       { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
 static const struct counter_desc ch_stats_desc[] = {
+       { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) },
+       { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) },
+       { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) },
+       { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) },
        { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
 };
 
 #define NUM_RQ_STATS                   ARRAY_SIZE(rq_stats_desc)
 #define NUM_SQ_STATS                   ARRAY_SIZE(sq_stats_desc)
+#define NUM_XDPSQ_STATS                        ARRAY_SIZE(xdpsq_stats_desc)
+#define NUM_RQ_XDPSQ_STATS             ARRAY_SIZE(rq_xdpsq_stats_desc)
 #define NUM_CH_STATS                   ARRAY_SIZE(ch_stats_desc)
 
 static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
@@ -1158,7 +1215,9 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
 
        return (NUM_RQ_STATS * max_nch) +
               (NUM_CH_STATS * max_nch) +
-              (NUM_SQ_STATS * max_nch * priv->max_opened_tc);
+              (NUM_SQ_STATS * max_nch * priv->max_opened_tc) +
+              (NUM_RQ_XDPSQ_STATS * max_nch) +
+              (NUM_XDPSQ_STATS * max_nch);
 }
 
 static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
@@ -1172,9 +1231,14 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
                        sprintf(data + (idx++) * ETH_GSTRING_LEN,
                                ch_stats_desc[j].format, i);
 
-       for (i = 0; i < max_nch; i++)
+       for (i = 0; i < max_nch; i++) {
                for (j = 0; j < NUM_RQ_STATS; j++)
-                       sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i);
+                       sprintf(data + (idx++) * ETH_GSTRING_LEN,
+                               rq_stats_desc[j].format, i);
+               for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
+                       sprintf(data + (idx++) * ETH_GSTRING_LEN,
+                               rq_xdpsq_stats_desc[j].format, i);
+       }
 
        for (tc = 0; tc < priv->max_opened_tc; tc++)
                for (i = 0; i < max_nch; i++)
@@ -1183,6 +1247,11 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
                                        sq_stats_desc[j].format,
                                        priv->channel_tc2txq[i][tc]);
 
+       for (i = 0; i < max_nch; i++)
+               for (j = 0; j < NUM_XDPSQ_STATS; j++)
+                       sprintf(data + (idx++) * ETH_GSTRING_LEN,
+                               xdpsq_stats_desc[j].format, i);
+
        return idx;
 }
 
@@ -1198,11 +1267,16 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
                                MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].ch,
                                                     ch_stats_desc, j);
 
-       for (i = 0; i < max_nch; i++)
+       for (i = 0; i < max_nch; i++) {
                for (j = 0; j < NUM_RQ_STATS; j++)
                        data[idx++] =
                                MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq,
                                                     rq_stats_desc, j);
+               for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
+                       data[idx++] =
+                               MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq,
+                                                    rq_xdpsq_stats_desc, j);
+       }
 
        for (tc = 0; tc < priv->max_opened_tc; tc++)
                for (i = 0; i < max_nch; i++)
@@ -1211,6 +1285,12 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
                                        MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc],
                                                             sq_stats_desc, j);
 
+       for (i = 0; i < max_nch; i++)
+               for (j = 0; j < NUM_XDPSQ_STATS; j++)
+                       data[idx++] =
+                               MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq,
+                                                    xdpsq_stats_desc, j);
+
        return idx;
 }
 
index 643153bb360722375c822f7f530e670c31c7f736..a4c035aedd46cfe2c64e24f02f64bf90bf809e46 100644 (file)
@@ -44,6 +44,8 @@
 #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
 #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld)
 
 struct counter_desc {
@@ -61,6 +63,7 @@ struct mlx5e_sw_stats {
        u64 tx_tso_inner_packets;
        u64 tx_tso_inner_bytes;
        u64 tx_added_vlan_packets;
+       u64 tx_nop;
        u64 rx_lro_packets;
        u64 rx_lro_bytes;
        u64 rx_removed_vlan_packets;
@@ -69,8 +72,11 @@ struct mlx5e_sw_stats {
        u64 rx_csum_complete;
        u64 rx_csum_unnecessary_inner;
        u64 rx_xdp_drop;
-       u64 rx_xdp_tx;
+       u64 rx_xdp_redirect;
+       u64 rx_xdp_tx_xmit;
        u64 rx_xdp_tx_full;
+       u64 rx_xdp_tx_err;
+       u64 rx_xdp_tx_cqe;
        u64 tx_csum_none;
        u64 tx_csum_partial;
        u64 tx_csum_partial_inner;
@@ -78,10 +84,17 @@ struct mlx5e_sw_stats {
        u64 tx_queue_dropped;
        u64 tx_xmit_more;
        u64 tx_recover;
+       u64 tx_cqes;
        u64 tx_queue_wake;
+       u64 tx_udp_seg_rem;
        u64 tx_cqe_err;
+       u64 tx_xdp_xmit;
+       u64 tx_xdp_full;
+       u64 tx_xdp_err;
+       u64 tx_xdp_cqes;
        u64 rx_wqe_err;
-       u64 rx_mpwqe_filler;
+       u64 rx_mpwqe_filler_cqes;
+       u64 rx_mpwqe_filler_strides;
        u64 rx_buff_alloc_err;
        u64 rx_cqe_compress_blks;
        u64 rx_cqe_compress_pkts;
@@ -91,6 +104,11 @@ struct mlx5e_sw_stats {
        u64 rx_cache_empty;
        u64 rx_cache_busy;
        u64 rx_cache_waive;
+       u64 rx_congst_umr;
+       u64 ch_events;
+       u64 ch_poll;
+       u64 ch_arm;
+       u64 ch_aff_change;
        u64 ch_eq_rearm;
 
 #ifdef CONFIG_MLX5_EN_TLS
@@ -168,10 +186,10 @@ struct mlx5e_rq_stats {
        u64 lro_bytes;
        u64 removed_vlan_packets;
        u64 xdp_drop;
-       u64 xdp_tx;
-       u64 xdp_tx_full;
+       u64 xdp_redirect;
        u64 wqe_err;
-       u64 mpwqe_filler;
+       u64 mpwqe_filler_cqes;
+       u64 mpwqe_filler_strides;
        u64 buff_alloc_err;
        u64 cqe_compress_blks;
        u64 cqe_compress_pkts;
@@ -181,6 +199,7 @@ struct mlx5e_rq_stats {
        u64 cache_empty;
        u64 cache_busy;
        u64 cache_waive;
+       u64 congst_umr;
 };
 
 struct mlx5e_sq_stats {
@@ -196,6 +215,7 @@ struct mlx5e_sq_stats {
        u64 csum_partial_inner;
        u64 added_vlan_packets;
        u64 nop;
+       u64 udp_seg_rem;
 #ifdef CONFIG_MLX5_EN_TLS
        u64 tls_ooo;
        u64 tls_resync_bytes;
@@ -206,11 +226,24 @@ struct mlx5e_sq_stats {
        u64 dropped;
        u64 recover;
        /* dirtied @completion */
-       u64 wake ____cacheline_aligned_in_smp;
+       u64 cqes ____cacheline_aligned_in_smp;
+       u64 wake;
        u64 cqe_err;
 };
 
+struct mlx5e_xdpsq_stats {
+       u64 xmit;
+       u64 full;
+       u64 err;
+       /* dirtied @completion */
+       u64 cqes ____cacheline_aligned_in_smp;
+};
+
 struct mlx5e_ch_stats {
+       u64 events;
+       u64 poll;
+       u64 arm;
+       u64 aff_change;
        u64 eq_rearm;
 };
 
index 3a2c4e548226e2e66e867ffbb9b6370ae767be78..c28fe469b04a3e273520409aaaac61ff7c981c36 100644 (file)
@@ -50,7 +50,7 @@
 #include "en_rep.h"
 #include "en_tc.h"
 #include "eswitch.h"
-#include "vxlan.h"
+#include "lib/vxlan.h"
 #include "fs_core.h"
 #include "en/port.h"
 
@@ -1032,10 +1032,8 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
                 * dst ip pair
                 */
                n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
-               if (!n) {
-                       WARN(1, "The neighbour already freed\n");
+               if (!n)
                        return;
-               }
 
                neigh_event_send(n, NULL);
                neigh_release(n);
@@ -1126,16 +1124,12 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
                        skb_flow_dissector_target(f->dissector,
                                                  FLOW_DISSECTOR_KEY_ENC_PORTS,
                                                  f->mask);
-               struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-               struct mlx5e_rep_priv *uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
-               struct net_device *up_dev = uplink_rpriv->netdev;
-               struct mlx5e_priv *up_priv = netdev_priv(up_dev);
 
                /* Full udp dst port must be given */
                if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
                        goto vxlan_match_offload_err;
 
-               if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
+               if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
                    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
                        parse_vxlan_attr(spec, f);
                else {
@@ -1213,6 +1207,26 @@ vxlan_match_offload_err:
                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
        }
 
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
+               struct flow_dissector_key_ip *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_IP,
+                                                 f->key);
+               struct flow_dissector_key_ip *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_IP,
+                                                 f->mask);
+
+               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
+
+               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos  >> 2);
+
+               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+       }
+
        /* Enforce DMAC when offloading incoming tunneled flows.
         * Flow counters require a match on the DMAC.
         */
@@ -1237,6 +1251,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                                       outer_headers);
        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
                                       outer_headers);
+       void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                   misc_parameters);
+       void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                   misc_parameters);
        u16 addr_type = 0;
        u8 ip_proto = 0;
 
@@ -1247,6 +1265,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
              BIT(FLOW_DISSECTOR_KEY_BASIC) |
              BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_VLAN) |
+             BIT(FLOW_DISSECTOR_KEY_CVLAN) |
              BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_PORTS) |
@@ -1256,7 +1275,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
              BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
              BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
              BIT(FLOW_DISSECTOR_KEY_TCP) |
-             BIT(FLOW_DISSECTOR_KEY_IP))) {
+             BIT(FLOW_DISSECTOR_KEY_IP)  |
+             BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
                netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
                            f->dissector->used_keys);
                return -EOPNOTSUPP;
@@ -1327,9 +1347,18 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                        skb_flow_dissector_target(f->dissector,
                                                  FLOW_DISSECTOR_KEY_VLAN,
                                                  f->mask);
-               if (mask->vlan_id || mask->vlan_priority) {
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
+               if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
+                       if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+                               MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                                        svlan_tag, 1);
+                               MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                                        svlan_tag, 1);
+                       } else {
+                               MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                                        cvlan_tag, 1);
+                               MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                                        cvlan_tag, 1);
+                       }
 
                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
@@ -1341,6 +1370,41 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                }
        }
 
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) {
+               struct flow_dissector_key_vlan *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_CVLAN,
+                                                 f->key);
+               struct flow_dissector_key_vlan *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_CVLAN,
+                                                 f->mask);
+               if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
+                       if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+                               MLX5_SET(fte_match_set_misc, misc_c,
+                                        outer_second_svlan_tag, 1);
+                               MLX5_SET(fte_match_set_misc, misc_v,
+                                        outer_second_svlan_tag, 1);
+                       } else {
+                               MLX5_SET(fte_match_set_misc, misc_c,
+                                        outer_second_cvlan_tag, 1);
+                               MLX5_SET(fte_match_set_misc, misc_v,
+                                        outer_second_cvlan_tag, 1);
+                       }
+
+                       MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
+                                mask->vlan_id);
+                       MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
+                                key->vlan_id);
+                       MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
+                                mask->vlan_priority);
+                       MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
+                                key->vlan_priority);
+
+                       *match_level = MLX5_MATCH_L2;
+               }
+       }
+
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
                struct flow_dissector_key_basic *key =
                        skb_flow_dissector_target(f->dissector,
@@ -2082,7 +2146,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
                                   struct net_device **out_dev,
                                   struct flowi4 *fl4,
                                   struct neighbour **out_n,
-                                  int *out_ttl)
+                                  u8 *out_ttl)
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5e_rep_priv *uplink_rpriv;
@@ -2106,7 +2170,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
        else
                *out_dev = rt->dst.dev;
 
-       *out_ttl = ip4_dst_hoplimit(&rt->dst);
+       if (!(*out_ttl))
+               *out_ttl = ip4_dst_hoplimit(&rt->dst);
        n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
        ip_rt_put(rt);
        if (!n)
@@ -2135,7 +2200,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
                                   struct net_device **out_dev,
                                   struct flowi6 *fl6,
                                   struct neighbour **out_n,
-                                  int *out_ttl)
+                                  u8 *out_ttl)
 {
        struct neighbour *n = NULL;
        struct dst_entry *dst;
@@ -2150,7 +2215,8 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
        if (ret < 0)
                return ret;
 
-       *out_ttl = ip6_dst_hoplimit(dst);
+       if (!(*out_ttl))
+               *out_ttl = ip6_dst_hoplimit(dst);
 
        uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
        /* if the egress device isn't on the same HW e-switch, we use the uplink */
@@ -2174,7 +2240,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
 static void gen_vxlan_header_ipv4(struct net_device *out_dev,
                                  char buf[], int encap_size,
                                  unsigned char h_dest[ETH_ALEN],
-                                 int ttl,
+                                 u8 tos, u8 ttl,
                                  __be32 daddr,
                                  __be32 saddr,
                                  __be16 udp_dst_port,
@@ -2194,6 +2260,7 @@ static void gen_vxlan_header_ipv4(struct net_device *out_dev,
        ip->daddr = daddr;
        ip->saddr = saddr;
 
+       ip->tos = tos;
        ip->ttl = ttl;
        ip->protocol = IPPROTO_UDP;
        ip->version = 0x4;
@@ -2207,7 +2274,7 @@ static void gen_vxlan_header_ipv4(struct net_device *out_dev,
 static void gen_vxlan_header_ipv6(struct net_device *out_dev,
                                  char buf[], int encap_size,
                                  unsigned char h_dest[ETH_ALEN],
-                                 int ttl,
+                                 u8 tos, u8 ttl,
                                  struct in6_addr *daddr,
                                  struct in6_addr *saddr,
                                  __be16 udp_dst_port,
@@ -2224,7 +2291,7 @@ static void gen_vxlan_header_ipv6(struct net_device *out_dev,
        ether_addr_copy(eth->h_source, out_dev->dev_addr);
        eth->h_proto = htons(ETH_P_IPV6);
 
-       ip6_flow_hdr(ip6h, 0, 0);
+       ip6_flow_hdr(ip6h, tos, 0);
        /* the HW fills up ipv6 payload len */
        ip6h->nexthdr     = IPPROTO_UDP;
        ip6h->hop_limit   = ttl;
@@ -2246,9 +2313,9 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
        struct net_device *out_dev;
        struct neighbour *n = NULL;
        struct flowi4 fl4 = {};
+       u8 nud_state, tos, ttl;
        char *encap_header;
-       int ttl, err;
-       u8 nud_state;
+       int err;
 
        if (max_encap_size < ipv4_encap_size) {
                mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
@@ -2269,6 +2336,10 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
                err = -EOPNOTSUPP;
                goto free_encap;
        }
+
+       tos = tun_key->tos;
+       ttl = tun_key->ttl;
+
        fl4.flowi4_tos = tun_key->tos;
        fl4.daddr = tun_key->u.ipv4.dst;
        fl4.saddr = tun_key->u.ipv4.src;
@@ -2303,7 +2374,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
        switch (e->tunnel_type) {
        case MLX5_HEADER_TYPE_VXLAN:
                gen_vxlan_header_ipv4(out_dev, encap_header,
-                                     ipv4_encap_size, e->h_dest, ttl,
+                                     ipv4_encap_size, e->h_dest, tos, ttl,
                                      fl4.daddr,
                                      fl4.saddr, tun_key->tp_dst,
                                      tunnel_id_to_key32(tun_key->tun_id));
@@ -2351,9 +2422,9 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
        struct net_device *out_dev;
        struct neighbour *n = NULL;
        struct flowi6 fl6 = {};
+       u8 nud_state, tos, ttl;
        char *encap_header;
-       int err, ttl = 0;
-       u8 nud_state;
+       int err;
 
        if (max_encap_size < ipv6_encap_size) {
                mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
@@ -2375,6 +2446,9 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
                goto free_encap;
        }
 
+       tos = tun_key->tos;
+       ttl = tun_key->ttl;
+
        fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
        fl6.daddr = tun_key->u.ipv6.dst;
        fl6.saddr = tun_key->u.ipv6.src;
@@ -2409,7 +2483,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
        switch (e->tunnel_type) {
        case MLX5_HEADER_TYPE_VXLAN:
                gen_vxlan_header_ipv6(out_dev, encap_header,
-                                     ipv6_encap_size, e->h_dest, ttl,
+                                     ipv6_encap_size, e->h_dest, tos, ttl,
                                      &fl6.daddr,
                                      &fl6.saddr, tun_key->tp_dst,
                                      tunnel_id_to_key32(tun_key->tun_id));
@@ -2455,11 +2529,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
                              struct mlx5e_tc_flow *flow)
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5e_rep_priv *uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw,
-                                                                          REP_ETH);
-       struct net_device *up_dev = uplink_rpriv->netdev;
        unsigned short family = ip_tunnel_info_af(tun_info);
-       struct mlx5e_priv *up_priv = netdev_priv(up_dev);
        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
        struct ip_tunnel_key *key = &tun_info->key;
        struct mlx5e_encap_entry *e;
@@ -2479,7 +2549,7 @@ vxlan_encap_offload_err:
                return -EOPNOTSUPP;
        }
 
-       if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
+       if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
            MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
                tunnel_type = MLX5_HEADER_TYPE_VXLAN;
        } else {
@@ -2535,6 +2605,56 @@ out_err:
        return err;
 }
 
+static int parse_tc_vlan_action(struct mlx5e_priv *priv,
+                               const struct tc_action *a,
+                               struct mlx5_esw_flow_attr *attr,
+                               u32 *action)
+{
+       u8 vlan_idx = attr->total_vlan;
+
+       if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
+               return -EOPNOTSUPP;
+
+       if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
+               if (vlan_idx) {
+                       if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+                                                                MLX5_FS_VLAN_DEPTH))
+                               return -EOPNOTSUPP;
+
+                       *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
+               } else {
+                       *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+               }
+       } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
+               attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a);
+               attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a);
+               attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a);
+               if (!attr->vlan_proto[vlan_idx])
+                       attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
+
+               if (vlan_idx) {
+                       if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+                                                                MLX5_FS_VLAN_DEPTH))
+                               return -EOPNOTSUPP;
+
+                       *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+               } else {
+                       if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
+                           (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
+                            tcf_vlan_push_prio(a)))
+                               return -EOPNOTSUPP;
+
+                       *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+               }
+       } else { /* action is TCA_VLAN_ACT_MODIFY */
+               return -EOPNOTSUPP;
+       }
+
+       attr->total_vlan = vlan_idx + 1;
+
+       return 0;
+}
+
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                                struct mlx5e_tc_flow_parse_attr *parse_attr,
                                struct mlx5e_tc_flow *flow)
@@ -2546,6 +2666,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
        LIST_HEAD(actions);
        bool encap = false;
        u32 action = 0;
+       int err;
 
        if (!tcf_exts_has_actions(exts))
                return -EINVAL;
@@ -2562,8 +2683,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                }
 
                if (is_tcf_pedit(a)) {
-                       int err;
-
                        err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
                                                    parse_attr);
                        if (err)
@@ -2630,23 +2749,11 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                }
 
                if (is_tcf_vlan(a)) {
-                       if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
-                               action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
-                       } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
-                               action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
-                               attr->vlan_vid = tcf_vlan_push_vid(a);
-                               if (mlx5_eswitch_vlan_actions_supported(priv->mdev)) {
-                                       attr->vlan_prio = tcf_vlan_push_prio(a);
-                                       attr->vlan_proto = tcf_vlan_push_proto(a);
-                                       if (!attr->vlan_proto)
-                                               attr->vlan_proto = htons(ETH_P_8021Q);
-                               } else if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
-                                          tcf_vlan_push_prio(a)) {
-                                       return -EOPNOTSUPP;
-                               }
-                       } else { /* action is TCA_VLAN_ACT_MODIFY */
-                               return -EOPNOTSUPP;
-                       }
+                       err = parse_tc_vlan_action(priv, a, attr, &action);
+
+                       if (err)
+                               return err;
+
                        attr->mirror_count = attr->out_count;
                        continue;
                }
index f29deb44bf3b162edefcb0a86af9662789fbde69..ae73ea992845683358e3d4097ad5ce58a6abde6e 100644 (file)
@@ -66,22 +66,21 @@ static inline void mlx5e_tx_dma_unmap(struct device *pdev,
        }
 }
 
+static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
+{
+       return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+}
+
 static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq,
                                  dma_addr_t addr,
                                  u32 size,
                                  enum mlx5e_dma_map_type map_type)
 {
-       u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask;
-
-       sq->db.dma_fifo[i].addr = addr;
-       sq->db.dma_fifo[i].size = size;
-       sq->db.dma_fifo[i].type = map_type;
-       sq->dma_fifo_pc++;
-}
+       struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
 
-static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
-{
-       return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+       dma->addr = addr;
+       dma->size = size;
+       dma->type = map_type;
 }
 
 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
@@ -111,10 +110,11 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb
 #endif
 
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
-                      void *accel_priv, select_queue_fallback_t fallback)
+                      struct net_device *sb_dev,
+                      select_queue_fallback_t fallback)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
-       int channel_ix = fallback(dev, skb);
+       int channel_ix = fallback(dev, skb, NULL);
        u16 num_channels;
        int up = 0;
 
@@ -228,7 +228,10 @@ mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb)
                stats->tso_inner_packets++;
                stats->tso_inner_bytes += skb->len - ihs;
        } else {
-               ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
+               if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+                       ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
+               else
+                       ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
                stats->tso_packets++;
                stats->tso_bytes += skb->len - ihs;
        }
@@ -443,12 +446,11 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
        sq = priv->txq2sq[skb_get_queue_mapping(skb)];
        mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
 
-#ifdef CONFIG_MLX5_ACCEL
        /* might send skbs and update wqe and pi */
        skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
        if (unlikely(!skb))
                return NETDEV_TX_OK;
-#endif
+
        return mlx5e_sq_xmit(sq, skb, wqe, pi);
 }
 
@@ -466,6 +468,7 @@ static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
 
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
+       struct mlx5e_sq_stats *stats;
        struct mlx5e_txqsq *sq;
        struct mlx5_cqe64 *cqe;
        u32 dma_fifo_cc;
@@ -483,6 +486,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
        if (!cqe)
                return false;
 
+       stats = sq->stats;
+
        npkts = 0;
        nbytes = 0;
 
@@ -511,7 +516,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
                                queue_work(cq->channel->priv->wq,
                                           &sq->recover.recover_work);
                        }
-                       sq->stats->cqe_err++;
+                       stats->cqe_err++;
                }
 
                do {
@@ -556,6 +561,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
        } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
+       stats->cqes += i;
+
        mlx5_cqwq_update_db_record(&cq->wq);
 
        /* ensure cq space is freed before enabling more cqes */
@@ -571,7 +578,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
                                   MLX5E_SQ_STOP_ROOM) &&
            !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
                netif_tx_wake_queue(sq->txq);
-               sq->stats->wake++;
+               stats->wake++;
        }
 
        return (i == MLX5E_TX_CQ_POLL_BUDGET);
index 1b17f682693b90b09300fcf92fd7ed74aa29fef5..85d51736015729267573436b35630acc4f0897b0 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <linux/irq.h>
 #include "en.h"
+#include "en/xdp.h"
 
 static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
 {
@@ -74,13 +75,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 {
        struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
                                               napi);
+       struct mlx5e_ch_stats *ch_stats = c->stats;
        bool busy = false;
        int work_done = 0;
        int i;
 
+       ch_stats->poll++;
+
        for (i = 0; i < c->num_tc; i++)
                busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
 
+       busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
+
        if (c->xdp)
                busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq);
 
@@ -94,6 +100,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
        if (busy) {
                if (likely(mlx5e_channel_no_affinity_change(c)))
                        return budget;
+               ch_stats->aff_change++;
                if (budget && work_done == budget)
                        work_done--;
        }
@@ -101,6 +108,8 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
        if (unlikely(!napi_complete_done(napi, work_done)))
                return work_done;
 
+       ch_stats->arm++;
+
        for (i = 0; i < c->num_tc; i++) {
                mlx5e_handle_tx_dim(&c->sq[i]);
                mlx5e_cq_arm(&c->sq[i].cq);
@@ -110,6 +119,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 
        mlx5e_cq_arm(&c->rq.cq);
        mlx5e_cq_arm(&c->icosq.cq);
+       mlx5e_cq_arm(&c->xdpsq.cq);
 
        return work_done;
 }
@@ -118,8 +128,9 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq)
 {
        struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
 
-       cq->event_ctr++;
        napi_schedule(cq->napi);
+       cq->event_ctr++;
+       cq->channel->stats->events++;
 }
 
 void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event)
index 406c23862f5f76557f84aa83144e6119521522e3..7669b438077953ade75e60d1d2f7e6731b0a391c 100644 (file)
@@ -40,6 +40,7 @@
 #include "mlx5_core.h"
 #include "fpga/core.h"
 #include "eswitch.h"
+#include "diag/fw_tracer.h"
 
 enum {
        MLX5_EQE_SIZE           = sizeof(struct mlx5_eqe),
@@ -168,6 +169,8 @@ static const char *eqe_type_str(u8 type)
                return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
        case MLX5_EVENT_TYPE_GENERAL_EVENT:
                return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+       case MLX5_EVENT_TYPE_DEVICE_TRACER:
+               return "MLX5_EVENT_TYPE_DEVICE_TRACER";
        default:
                return "Unrecognized event";
        }
@@ -576,6 +579,11 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
                case MLX5_EVENT_TYPE_GENERAL_EVENT:
                        general_event_handler(dev, eqe);
                        break;
+
+               case MLX5_EVENT_TYPE_DEVICE_TRACER:
+                       mlx5_fw_tracer_event(dev, eqe);
+                       break;
+
                default:
                        mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
                                       eqe->type, eq->eqn);
@@ -853,6 +861,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
        if (MLX5_CAP_GEN(dev, temp_warn_event))
                async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
 
+       if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
+               async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
+
        err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
                                 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
                                 "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
index b174da2884c56b96805c007f002b6b86cb97fd8f..c17bfcab517c18745901d0c059b316bea1945203 100644 (file)
@@ -38,6 +38,7 @@
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/fs.h>
 #include "lib/mpfs.h"
 
 #ifdef CONFIG_MLX5_ESWITCH
@@ -256,9 +257,10 @@ struct mlx5_esw_flow_attr {
        int out_count;
 
        int     action;
-       __be16  vlan_proto;
-       u16     vlan_vid;
-       u8      vlan_prio;
+       __be16  vlan_proto[MLX5_FS_VLAN_DEPTH];
+       u16     vlan_vid[MLX5_FS_VLAN_DEPTH];
+       u8      vlan_prio[MLX5_FS_VLAN_DEPTH];
+       u8      total_vlan;
        bool    vlan_handled;
        u32     encap_id;
        u32     mod_hdr_id;
@@ -282,10 +284,17 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
                                  int vport, u16 vlan, u8 qos, u8 set_flags);
 
-static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev)
+static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
+                                                      u8 vlan_depth)
 {
-       return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) &&
-              MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan);
+       bool ret = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) &&
+                  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan);
+
+       if (vlan_depth == 1)
+               return ret;
+
+       return  ret && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan_2) &&
+               MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
 }
 
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
index 91f1209886ffdbb37af33ac32369f312296f8bfa..f72b5c9dcfe95f98cc388676462207bf76247504 100644 (file)
@@ -66,13 +66,18 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 
        flow_act.action = attr->action;
        /* if per flow vlan pop/push is emulated, don't set that into the firmware */
-       if (!mlx5_eswitch_vlan_actions_supported(esw->dev))
+       if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
                flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
                                     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
        else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
-               flow_act.vlan.ethtype = ntohs(attr->vlan_proto);
-               flow_act.vlan.vid = attr->vlan_vid;
-               flow_act.vlan.prio = attr->vlan_prio;
+               flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto[0]);
+               flow_act.vlan[0].vid = attr->vlan_vid[0];
+               flow_act.vlan[0].prio = attr->vlan_prio[0];
+               if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+                       flow_act.vlan[1].ethtype = ntohs(attr->vlan_proto[1]);
+                       flow_act.vlan[1].vid = attr->vlan_vid[1];
+                       flow_act.vlan[1].prio = attr->vlan_prio[1];
+               }
        }
 
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
@@ -266,7 +271,7 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
        /* protects against (1) setting rules with different vlans to push and
         * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
         */
-       if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid))
+       if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid[0]))
                goto out_notsupp;
 
        return 0;
@@ -284,7 +289,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
        int err = 0;
 
        /* nop if we're on the vlan push/pop non emulation mode */
-       if (mlx5_eswitch_vlan_actions_supported(esw->dev))
+       if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
                return 0;
 
        push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
@@ -324,11 +329,11 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
                if (vport->vlan_refcount)
                        goto skip_set_push;
 
-               err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid, 0,
+               err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid[0], 0,
                                                    SET_VLAN_INSERT | SET_VLAN_STRIP);
                if (err)
                        goto out;
-               vport->vlan = attr->vlan_vid;
+               vport->vlan = attr->vlan_vid[0];
 skip_set_push:
                vport->vlan_refcount++;
        }
@@ -347,7 +352,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
        int err = 0;
 
        /* nop if we're on the vlan push/pop non emulation mode */
-       if (mlx5_eswitch_vlan_actions_supported(esw->dev))
+       if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
                return 0;
 
        if (!attr->vlan_handled)
index c9736238604ab266d320e380bbcb03e1b4b3cdb9..5cf5f2a9d51fec724f4fac709e29e40f4110d5f7 100644 (file)
@@ -129,6 +129,7 @@ static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev,
 static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
                                    void *ptr)
 {
+       unsigned long flags;
        int ret;
 
        /* TLS metadata format is 1 byte for syndrome followed
@@ -139,9 +140,9 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
        BUILD_BUG_ON((SWID_END - 1) & 0xFF000000);
 
        idr_preload(GFP_KERNEL);
-       spin_lock_irq(idr_spinlock);
+       spin_lock_irqsave(idr_spinlock, flags);
        ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC);
-       spin_unlock_irq(idr_spinlock);
+       spin_unlock_irqrestore(idr_spinlock, flags);
        idr_preload_end();
 
        return ret;
@@ -157,6 +158,13 @@ static void mlx5_fpga_tls_release_swid(struct idr *idr,
        spin_unlock_irqrestore(idr_spinlock, flags);
 }
 
+static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
+                                  struct mlx5_fpga_device *fdev,
+                                  struct mlx5_fpga_dma_buf *buf, u8 status)
+{
+       kfree(buf);
+}
+
 struct mlx5_teardown_stream_context {
        struct mlx5_fpga_tls_command_context cmd;
        u32 swid;
@@ -178,9 +186,13 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
                        mlx5_fpga_err(fdev,
                                      "Teardown stream failed with syndrome = %d",
                                      syndrome);
-               else
+               else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx))
                        mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
-                                                  &fdev->tls->idr_spinlock,
+                                                  &fdev->tls->tx_idr_spinlock,
+                                                  ctx->swid);
+               else
+                       mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr,
+                                                  &fdev->tls->rx_idr_spinlock,
                                                   ctx->swid);
        }
        mlx5_fpga_tls_put_command_ctx(cmd);
@@ -196,6 +208,40 @@ static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
                 MLX5_GET(tls_flow, flow, direction_sx));
 }
 
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+                           u64 rcd_sn)
+{
+       struct mlx5_fpga_dma_buf *buf;
+       int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE;
+       void *flow;
+       void *cmd;
+       int ret;
+
+       buf = kzalloc(size, GFP_ATOMIC);
+       if (!buf)
+               return -ENOMEM;
+
+       cmd = (buf + 1);
+
+       rcu_read_lock();
+       flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+       rcu_read_unlock();
+       mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+
+       MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
+       MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
+       MLX5_SET(tls_cmd, cmd, tcp_sn, seq);
+       MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX);
+
+       buf->sg[0].data = cmd;
+       buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+       buf->complete = mlx_tls_kfree_complete;
+
+       ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
+
+       return ret;
+}
+
 static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
                                            void *flow, u32 swid, gfp_t flags)
 {
@@ -223,14 +269,18 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
                               mlx5_fpga_tls_teardown_completion);
 }
 
-void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid,
-                              gfp_t flags)
+void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+                           gfp_t flags, bool direction_sx)
 {
        struct mlx5_fpga_tls *tls = mdev->fpga->tls;
        void *flow;
 
        rcu_read_lock();
-       flow = idr_find(&tls->tx_idr, swid);
+       if (direction_sx)
+               flow = idr_find(&tls->tx_idr, swid);
+       else
+               flow = idr_find(&tls->rx_idr, swid);
+
        rcu_read_unlock();
 
        if (!flow) {
@@ -289,9 +339,11 @@ mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn,
                 * the command context because we might not have received
                 * the tx completion yet.
                 */
-               mlx5_fpga_tls_del_tx_flow(fdev->mdev,
-                                         MLX5_GET(tls_cmd, tls_cmd, swid),
-                                         GFP_ATOMIC);
+               mlx5_fpga_tls_del_flow(fdev->mdev,
+                                      MLX5_GET(tls_cmd, tls_cmd, swid),
+                                      GFP_ATOMIC,
+                                      MLX5_GET(tls_cmd, tls_cmd,
+                                               direction_sx));
        }
 
        mlx5_fpga_tls_put_command_ctx(cmd);
@@ -415,8 +467,7 @@ int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
        if (err)
                goto error;
 
-       if (!(tls->caps & (MLX5_ACCEL_TLS_TX | MLX5_ACCEL_TLS_V12 |
-                                MLX5_ACCEL_TLS_AES_GCM128))) {
+       if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) {
                err = -ENOTSUPP;
                goto error;
        }
@@ -438,7 +489,9 @@ int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
        INIT_LIST_HEAD(&tls->pending_cmds);
 
        idr_init(&tls->tx_idr);
-       spin_lock_init(&tls->idr_spinlock);
+       idr_init(&tls->rx_idr);
+       spin_lock_init(&tls->tx_idr_spinlock);
+       spin_lock_init(&tls->rx_idr_spinlock);
        fdev->tls = tls;
        return 0;
 
@@ -500,9 +553,9 @@ static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps,
        return 0;
 }
 
-static int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
-                                 struct tls_crypto_info *crypto_info, u32 swid,
-                                 u32 tcp_sn)
+static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+                                  struct tls_crypto_info *crypto_info,
+                                  u32 swid, u32 tcp_sn)
 {
        u32 caps = mlx5_fpga_tls_device_caps(mdev);
        struct mlx5_setup_stream_context *ctx;
@@ -533,30 +586,42 @@ out:
        return ret;
 }
 
-int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-                             struct tls_crypto_info *crypto_info,
-                             u32 start_offload_tcp_sn, u32 *p_swid)
+int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+                          struct tls_crypto_info *crypto_info,
+                          u32 start_offload_tcp_sn, u32 *p_swid,
+                          bool direction_sx)
 {
        struct mlx5_fpga_tls *tls = mdev->fpga->tls;
        int ret = -ENOMEM;
        u32 swid;
 
-       ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr, &tls->idr_spinlock, flow);
+       if (direction_sx)
+               ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr,
+                                              &tls->tx_idr_spinlock, flow);
+       else
+               ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr,
+                                              &tls->rx_idr_spinlock, flow);
+
        if (ret < 0)
                return ret;
 
        swid = ret;
-       MLX5_SET(tls_flow, flow, direction_sx, 1);
+       MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0);
 
-       ret = mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
-                                    start_offload_tcp_sn);
+       ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
+                                     start_offload_tcp_sn);
        if (ret && ret != -EINTR)
                goto free_swid;
 
        *p_swid = swid;
        return 0;
 free_swid:
-       mlx5_fpga_tls_release_swid(&tls->tx_idr, &tls->idr_spinlock, swid);
+       if (direction_sx)
+               mlx5_fpga_tls_release_swid(&tls->tx_idr,
+                                          &tls->tx_idr_spinlock, swid);
+       else
+               mlx5_fpga_tls_release_swid(&tls->rx_idr,
+                                          &tls->rx_idr_spinlock, swid);
 
        return ret;
 }
index 800a214e4e490f9c2267b60bf3fdff8a3ff31978..3b2e37bf76febd7aaa241386560b2707a1346e78 100644 (file)
@@ -46,15 +46,18 @@ struct mlx5_fpga_tls {
        struct mlx5_fpga_conn *conn;
 
        struct idr tx_idr;
-       spinlock_t idr_spinlock; /* protects the IDR */
+       struct idr rx_idr;
+       spinlock_t tx_idr_spinlock; /* protects the IDR */
+       spinlock_t rx_idr_spinlock; /* protects the IDR */
 };
 
-int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-                             struct tls_crypto_info *crypto_info,
-                             u32 start_offload_tcp_sn, u32 *p_swid);
+int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+                          struct tls_crypto_info *crypto_info,
+                          u32 start_offload_tcp_sn, u32 *p_swid,
+                          bool direction_sx);
 
-void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid,
-                              gfp_t flags);
+void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+                           gfp_t flags, bool direction_sx);
 
 bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev);
 int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev);
@@ -65,4 +68,7 @@ static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
        return mdev->fpga->tls->caps;
 }
 
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+                           u64 rcd_sn);
+
 #endif /* __MLX5_FPGA_TLS_H__ */
index 5a00deff54576429460ee10fd25ea3bc62f28485..6a62b84e57f4fcb90fb54c7e8fb1ccfb9c686503 100644 (file)
@@ -349,9 +349,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
        vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
 
-       MLX5_SET(vlan, vlan, ethtype, fte->action.vlan.ethtype);
-       MLX5_SET(vlan, vlan, vid, fte->action.vlan.vid);
-       MLX5_SET(vlan, vlan, prio, fte->action.vlan.prio);
+       MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
+       MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid);
+       MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio);
+
+       vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2);
+
+       MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype);
+       MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid);
+       MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio);
 
        in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
                                      match_value);
index 6ddb2565884d5372ebfbe814baca6279da68e60b..a21df24b695e5bf22297981b89adfd108a920aa8 100644 (file)
@@ -1465,7 +1465,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
                             MLX5_FLOW_CONTEXT_ACTION_DECAP |
                             MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
                             MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
-                            MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))
+                            MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+                            MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 |
+                            MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
                return true;
 
        return false;
@@ -1824,7 +1826,7 @@ search_again_locked:
 
        g = alloc_auto_flow_group(ft, spec);
        if (IS_ERR(g)) {
-               rule = (void *)g;
+               rule = ERR_CAST(g);
                up_write_ref_node(&ft->node);
                return rule;
        }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
new file mode 100644 (file)
index 0000000..9a8fd76
--- /dev/null
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "vxlan.h"
+
+struct mlx5_vxlan {
+       struct mlx5_core_dev            *mdev;
+       spinlock_t                      lock; /* protect vxlan table */
+       /* max_num_ports is usuallly 4, 16 buckets is more than enough */
+       DECLARE_HASHTABLE(htable, 4);
+       int                             num_ports;
+       struct mutex                    sync_lock; /* sync add/del port HW operations */
+};
+
+struct mlx5_vxlan_port {
+       struct hlist_node hlist;
+       atomic_t refcount;
+       u16 udp_port;
+};
+
+static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
+{
+       return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
+}
+
+static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+       u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
+
+       MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
+                MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
+       MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+       return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+       u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
+
+       MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
+                MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+       MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+       return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static struct mlx5_vxlan_port*
+mlx5_vxlan_lookup_port_locked(struct mlx5_vxlan *vxlan, u16 port)
+{
+       struct mlx5_vxlan_port *vxlanp;
+
+       hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) {
+               if (vxlanp->udp_port == port)
+                       return vxlanp;
+       }
+
+       return NULL;
+}
+
+struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+       struct mlx5_vxlan_port *vxlanp;
+
+       if (!mlx5_vxlan_allowed(vxlan))
+               return NULL;
+
+       spin_lock_bh(&vxlan->lock);
+       vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
+       spin_unlock_bh(&vxlan->lock);
+
+       return vxlanp;
+}
+
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+       struct mlx5_vxlan_port *vxlanp;
+       int ret = -ENOSPC;
+
+       vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
+       if (vxlanp) {
+               atomic_inc(&vxlanp->refcount);
+               return 0;
+       }
+
+       mutex_lock(&vxlan->sync_lock);
+       if (vxlan->num_ports >= mlx5_vxlan_max_udp_ports(vxlan->mdev)) {
+               mlx5_core_info(vxlan->mdev,
+                              "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n",
+                              port, mlx5_vxlan_max_udp_ports(vxlan->mdev));
+               ret = -ENOSPC;
+               goto unlock;
+       }
+
+       ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
+       if (ret)
+               goto unlock;
+
+       vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL);
+       if (!vxlanp) {
+               ret = -ENOMEM;
+               goto err_delete_port;
+       }
+
+       vxlanp->udp_port = port;
+       atomic_set(&vxlanp->refcount, 1);
+
+       spin_lock_bh(&vxlan->lock);
+       hash_add(vxlan->htable, &vxlanp->hlist, port);
+       spin_unlock_bh(&vxlan->lock);
+
+       vxlan->num_ports++;
+       mutex_unlock(&vxlan->sync_lock);
+       return 0;
+
+err_delete_port:
+       mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+
+unlock:
+       mutex_unlock(&vxlan->sync_lock);
+       return ret;
+}
+
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+       struct mlx5_vxlan_port *vxlanp;
+       bool remove = false;
+       int ret = 0;
+
+       mutex_lock(&vxlan->sync_lock);
+
+       spin_lock_bh(&vxlan->lock);
+       vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
+       if (!vxlanp) {
+               ret = -ENOENT;
+               goto out_unlock;
+       }
+
+       if (atomic_dec_and_test(&vxlanp->refcount)) {
+               hash_del(&vxlanp->hlist);
+               remove = true;
+       }
+
+out_unlock:
+       spin_unlock_bh(&vxlan->lock);
+
+       if (remove) {
+               mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+               kfree(vxlanp);
+               vxlan->num_ports--;
+       }
+
+       mutex_unlock(&vxlan->sync_lock);
+
+       return ret;
+}
+
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_vxlan *vxlan;
+
+       if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev))
+               return ERR_PTR(-ENOTSUPP);
+
+       vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL);
+       if (!vxlan)
+               return ERR_PTR(-ENOMEM);
+
+       vxlan->mdev = mdev;
+       mutex_init(&vxlan->sync_lock);
+       spin_lock_init(&vxlan->lock);
+       hash_init(vxlan->htable);
+
+       /* Hardware adds 4789 by default */
+       mlx5_vxlan_add_port(vxlan, 4789);
+
+       return vxlan;
+}
+
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
+{
+       struct mlx5_vxlan_port *vxlanp;
+       struct hlist_node *tmp;
+       int bkt;
+
+       if (!mlx5_vxlan_allowed(vxlan))
+               return;
+
+       /* Lockless since we are the only hash table consumers*/
+       hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
+               hash_del(&vxlanp->hlist);
+               mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port);
+               kfree(vxlanp);
+       }
+
+       kfree(vxlan);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
new file mode 100644 (file)
index 0000000..fd874a3
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_VXLAN_H__
+#define __MLX5_VXLAN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_vxlan;
+struct mlx5_vxlan_port;
+
+#ifdef CONFIG_MLX5_CORE_EN
+
+static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan)
+{
+       /* not allowed reason is encoded in vxlan pointer as error,
+        * on mlx5_vxlan_create
+        */
+       return !IS_ERR_OR_NULL(vxlan);
+}
+
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev);
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
+struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+
+#else
+
+static inline struct mlx5_vxlan*
+mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-ENOTSUPP); }
+static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
+
+#endif
+
+#endif /* __MLX5_VXLAN_H__ */
index 615005e6381958c6f8289897993fda27a8693e25..03b9c6733eedffb34165c83e817f1ab23484b111 100644 (file)
 #include "accel/ipsec.h"
 #include "accel/tls.h"
 #include "lib/clock.h"
+#include "lib/vxlan.h"
+#include "diag/fw_tracer.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
-MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
+MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRIVER_VERSION);
 
@@ -960,6 +962,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 
        mlx5_init_clock(dev);
 
+       dev->vxlan = mlx5_vxlan_create(dev);
+
        err = mlx5_init_rl_table(dev);
        if (err) {
                dev_err(&pdev->dev, "Failed to init rate limiting\n");
@@ -990,6 +994,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto err_sriov_cleanup;
        }
 
+       dev->tracer = mlx5_fw_tracer_create(dev);
+
        return 0;
 
 err_sriov_cleanup:
@@ -1001,6 +1007,7 @@ err_mpfs_cleanup:
 err_rl_cleanup:
        mlx5_cleanup_rl_table(dev);
 err_tables_cleanup:
+       mlx5_vxlan_destroy(dev->vxlan);
        mlx5_cleanup_mkey_table(dev);
        mlx5_cleanup_srq_table(dev);
        mlx5_cleanup_qp_table(dev);
@@ -1015,11 +1022,13 @@ out:
 
 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
+       mlx5_fw_tracer_destroy(dev->tracer);
        mlx5_fpga_cleanup(dev);
        mlx5_sriov_cleanup(dev);
        mlx5_eswitch_cleanup(dev->priv.eswitch);
        mlx5_mpfs_cleanup(dev);
        mlx5_cleanup_rl_table(dev);
+       mlx5_vxlan_destroy(dev->vxlan);
        mlx5_cleanup_clock(dev);
        mlx5_cleanup_reserved_gids(dev);
        mlx5_cleanup_mkey_table(dev);
@@ -1167,10 +1176,16 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_put_uars;
        }
 
+       err = mlx5_fw_tracer_init(dev->tracer);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to init FW tracer\n");
+               goto err_fw_tracer;
+       }
+
        err = alloc_comp_eqs(dev);
        if (err) {
                dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
-               goto err_stop_eqs;
+               goto err_comp_eqs;
        }
 
        err = mlx5_irq_set_affinity_hints(dev);
@@ -1252,7 +1267,10 @@ err_fpga_start:
 err_affinity_hints:
        free_comp_eqs(dev);
 
-err_stop_eqs:
+err_comp_eqs:
+       mlx5_fw_tracer_cleanup(dev->tracer);
+
+err_fw_tracer:
        mlx5_stop_eqs(dev);
 
 err_put_uars:
@@ -1320,6 +1338,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
        mlx5_fpga_device_stop(dev);
        mlx5_irq_clear_affinity_hints(dev);
        free_comp_eqs(dev);
+       mlx5_fw_tracer_cleanup(dev->tracer);
        mlx5_stop_eqs(dev);
        mlx5_put_uars_page(dev, priv->uar);
        mlx5_free_irq_vectors(dev);
index 023882d9a22e59a7b172aee6a3260606932f4a03..49955117ae365888bdab0464f281e664682e122d 100644 (file)
@@ -66,6 +66,12 @@ do {                                                                 \
                __func__, __LINE__, current->pid,       \
               ##__VA_ARGS__)
 
+#define mlx5_core_err_rl(__dev, format, ...)                           \
+       dev_err_ratelimited(&(__dev)->pdev->dev,                        \
+                          "%s:%d:(pid %d): " format,                   \
+                          __func__, __LINE__, current->pid,            \
+                          ##__VA_ARGS__)
+
 #define mlx5_core_warn(__dev, format, ...)                             \
        dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,        \
                 __func__, __LINE__, current->pid,                      \
index f4f02f775c93869ba978ec6dc9be8dd62d7d99a9..0670165afd5fda40aaf1f0a9dc54e570210f9739 100644 (file)
@@ -146,23 +146,6 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
 }
 EXPORT_SYMBOL(mlx5_core_query_mkey);
 
-int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
-                            u32 *mkey)
-{
-       u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
-       u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)]   = {0};
-       int err;
-
-       MLX5_SET(query_special_contexts_in, in, opcode,
-                MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
-       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-       if (!err)
-               *mkey = MLX5_GET(query_special_contexts_out, out,
-                                dump_fill_mkey);
-       return err;
-}
-EXPORT_SYMBOL(mlx5_core_dump_fill_mkey);
-
 static inline u32 mlx5_get_psv(u32 *out, int psv_index)
 {
        switch (psv_index) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
deleted file mode 100644 (file)
index 2f74953..0000000
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include "mlx5_core.h"
-#include "vxlan.h"
-
-void mlx5e_vxlan_init(struct mlx5e_priv *priv)
-{
-       struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
-
-       spin_lock_init(&vxlan_db->lock);
-       INIT_RADIX_TREE(&vxlan_db->tree, GFP_ATOMIC);
-}
-
-static int mlx5e_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
-{
-       u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
-
-       MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
-                MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
-       MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-       return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-}
-
-static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
-{
-       u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
-
-       MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
-                MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
-       MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-       return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-}
-
-struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
-{
-       struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
-       struct mlx5e_vxlan *vxlan;
-
-       spin_lock_bh(&vxlan_db->lock);
-       vxlan = radix_tree_lookup(&vxlan_db->tree, port);
-       spin_unlock_bh(&vxlan_db->lock);
-
-       return vxlan;
-}
-
-static void mlx5e_vxlan_add_port(struct work_struct *work)
-{
-       struct mlx5e_vxlan_work *vxlan_work =
-               container_of(work, struct mlx5e_vxlan_work, work);
-       struct mlx5e_priv *priv = vxlan_work->priv;
-       struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
-       u16 port = vxlan_work->port;
-       struct mlx5e_vxlan *vxlan;
-       int err;
-
-       mutex_lock(&priv->state_lock);
-       vxlan = mlx5e_vxlan_lookup_port(priv, port);
-       if (vxlan) {
-               atomic_inc(&vxlan->refcount);
-               goto free_work;
-       }
-
-       if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
-               goto free_work;
-
-       vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL);
-       if (!vxlan)
-               goto err_delete_port;
-
-       vxlan->udp_port = port;
-       atomic_set(&vxlan->refcount, 1);
-
-       spin_lock_bh(&vxlan_db->lock);
-       err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
-       spin_unlock_bh(&vxlan_db->lock);
-       if (err)
-               goto err_free;
-
-       goto free_work;
-
-err_free:
-       kfree(vxlan);
-err_delete_port:
-       mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
-free_work:
-       mutex_unlock(&priv->state_lock);
-       kfree(vxlan_work);
-}
-
-static void mlx5e_vxlan_del_port(struct work_struct *work)
-{
-       struct mlx5e_vxlan_work *vxlan_work =
-               container_of(work, struct mlx5e_vxlan_work, work);
-       struct mlx5e_priv *priv         = vxlan_work->priv;
-       struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
-       u16 port = vxlan_work->port;
-       struct mlx5e_vxlan *vxlan;
-       bool remove = false;
-
-       mutex_lock(&priv->state_lock);
-       spin_lock_bh(&vxlan_db->lock);
-       vxlan = radix_tree_lookup(&vxlan_db->tree, port);
-       if (!vxlan)
-               goto out_unlock;
-
-       if (atomic_dec_and_test(&vxlan->refcount)) {
-               radix_tree_delete(&vxlan_db->tree, port);
-               remove = true;
-       }
-
-out_unlock:
-       spin_unlock_bh(&vxlan_db->lock);
-
-       if (remove) {
-               mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
-               kfree(vxlan);
-       }
-       mutex_unlock(&priv->state_lock);
-       kfree(vxlan_work);
-}
-
-void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, sa_family_t sa_family,
-                           u16 port, int add)
-{
-       struct mlx5e_vxlan_work *vxlan_work;
-
-       vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC);
-       if (!vxlan_work)
-               return;
-
-       if (add)
-               INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_port);
-       else
-               INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_port);
-
-       vxlan_work->priv = priv;
-       vxlan_work->port = port;
-       vxlan_work->sa_family = sa_family;
-       queue_work(priv->wq, &vxlan_work->work);
-}
-
-void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
-{
-       struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
-       struct mlx5e_vxlan *vxlan;
-       unsigned int port = 0;
-
-       /* Lockless since we are the only radix-tree consumers, wq is disabled */
-       while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
-               port = vxlan->udp_port;
-               radix_tree_delete(&vxlan_db->tree, port);
-               mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
-               kfree(vxlan);
-       }
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
deleted file mode 100644 (file)
index 5ef6ae7..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __MLX5_VXLAN_H__
-#define __MLX5_VXLAN_H__
-
-#include <linux/mlx5/driver.h>
-#include "en.h"
-
-struct mlx5e_vxlan {
-       atomic_t refcount;
-       u16 udp_port;
-};
-
-struct mlx5e_vxlan_work {
-       struct work_struct      work;
-       struct mlx5e_priv       *priv;
-       sa_family_t             sa_family;
-       u16                     port;
-};
-
-static inline bool mlx5e_vxlan_allowed(struct mlx5_core_dev *mdev)
-{
-       return (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) &&
-               mlx5_core_is_pf(mdev));
-}
-
-void mlx5e_vxlan_init(struct mlx5e_priv *priv);
-void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv);
-
-void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, sa_family_t sa_family,
-                           u16 port, int add);
-struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port);
-
-#endif /* __MLX5_VXLAN_H__ */
index 0b47126815b636246009123d6b6dbf14133db28c..2bd4c3184eba21d866ea8df66699a41145e3ec10 100644 (file)
@@ -229,6 +229,11 @@ static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq)
        return !wq->cur_sz;
 }
 
+static inline int mlx5_wq_ll_missing(struct mlx5_wq_ll *wq)
+{
+       return wq->fbc.sz_m1 - wq->cur_sz;
+}
+
 static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix)
 {
        return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
index 82827a8d3d67cac73ac3f6c232e3f750553deddc..8a291eb36c64cfbe51da7138453ab0456a46e32a 100644 (file)
@@ -78,6 +78,7 @@ config MLXSW_SPECTRUM
        depends on IPV6 || IPV6=n
        depends on NET_IPGRE || NET_IPGRE=n
        depends on IPV6_GRE || IPV6_GRE=n
+       select GENERIC_ALLOCATOR
        select PARMAN
        select MLXFW
        default m
index 0cadcabfe86f10ba5132cad20cb3a3a3ce035b19..68fa44a41485319d894345db7e5e09efc05b7fbf 100644 (file)
@@ -15,11 +15,18 @@ mlxsw_switchx2-objs         := switchx2.o
 obj-$(CONFIG_MLXSW_SPECTRUM)   += mlxsw_spectrum.o
 mlxsw_spectrum-objs            := spectrum.o spectrum_buffers.o \
                                   spectrum_switchdev.o spectrum_router.o \
-                                  spectrum_kvdl.o spectrum_acl_tcam.o \
-                                  spectrum_acl.o spectrum_flower.o \
-                                  spectrum_cnt.o spectrum_fid.o \
-                                  spectrum_ipip.o spectrum_acl_flex_actions.o \
-                                  spectrum_mr.o spectrum_mr_tcam.o \
+                                  spectrum1_kvdl.o spectrum2_kvdl.o \
+                                  spectrum_kvdl.o \
+                                  spectrum_acl_tcam.o spectrum_acl_ctcam.o \
+                                  spectrum_acl_atcam.o spectrum_acl_erp.o \
+                                  spectrum1_acl_tcam.o spectrum2_acl_tcam.o \
+                                  spectrum_acl.o \
+                                  spectrum_flower.o spectrum_cnt.o \
+                                  spectrum_fid.o spectrum_ipip.o \
+                                  spectrum_acl_flex_actions.o \
+                                  spectrum_acl_flex_keys.o \
+                                  spectrum1_mr_tcam.o spectrum2_mr_tcam.o \
+                                  spectrum_mr_tcam.o spectrum_mr.o \
                                   spectrum_qdisc.o spectrum_span.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)    += spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
index f6f6a568d66a5a55cac67176c4c9a5ee01c11a90..66ea256fe56090c31857ed843b8abf4e42eb4a41 100644 (file)
@@ -355,9 +355,24 @@ struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa)
        block->first_set = mlxsw_afa_set_create(true);
        if (!block->first_set)
                goto err_first_set_create;
-       block->cur_set = block->first_set;
+
+       /* In case user instructs to have dummy first set, we leave it
+        * empty here and create another, real, set right away.
+        */
+       if (mlxsw_afa->ops->dummy_first_set) {
+               block->cur_set = mlxsw_afa_set_create(false);
+               if (!block->cur_set)
+                       goto err_second_set_create;
+               block->cur_set->prev = block->first_set;
+               block->first_set->next = block->cur_set;
+       } else {
+               block->cur_set = block->first_set;
+       }
+
        return block;
 
+err_second_set_create:
+       mlxsw_afa_set_destroy(block->first_set);
 err_first_set_create:
        kfree(block);
        return NULL;
@@ -419,11 +434,31 @@ char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block)
 }
 EXPORT_SYMBOL(mlxsw_afa_block_first_set);
 
-u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block)
+char *mlxsw_afa_block_cur_set(struct mlxsw_afa_block *block)
 {
-       return block->first_set->kvdl_index;
+       return block->cur_set->ht_key.enc_actions;
 }
-EXPORT_SYMBOL(mlxsw_afa_block_first_set_kvdl_index);
+EXPORT_SYMBOL(mlxsw_afa_block_cur_set);
+
+u32 mlxsw_afa_block_first_kvdl_index(struct mlxsw_afa_block *block)
+{
+       /* First set is never in KVD linear. So the first set
+        * with valid KVD linear index is always the second one.
+        */
+       if (WARN_ON(!block->first_set->next))
+               return 0;
+       return block->first_set->next->kvdl_index;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_first_kvdl_index);
+
+int mlxsw_afa_block_activity_get(struct mlxsw_afa_block *block, bool *activity)
+{
+       u32 kvdl_index = mlxsw_afa_block_first_kvdl_index(block);
+
+       return block->afa->ops->kvdl_set_activity_get(block->afa->ops_priv,
+                                                     kvdl_index, activity);
+}
+EXPORT_SYMBOL(mlxsw_afa_block_activity_get);
 
 int mlxsw_afa_block_continue(struct mlxsw_afa_block *block)
 {
@@ -724,14 +759,17 @@ mlxsw_afa_vlan_pack(char *payload,
 }
 
 int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
-                                      u16 vid, u8 pcp, u8 et)
+                                      u16 vid, u8 pcp, u8 et,
+                                      struct netlink_ext_ack *extack)
 {
        char *act = mlxsw_afa_block_append_action(block,
                                                  MLXSW_AFA_VLAN_CODE,
                                                  MLXSW_AFA_VLAN_SIZE);
 
-       if (IS_ERR(act))
+       if (IS_ERR(act)) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot append vlan_modify action");
                return PTR_ERR(act);
+       }
        mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP,
                            MLXSW_AFA_VLAN_CMD_SET_OUTER, vid,
                            MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp,
@@ -925,19 +963,23 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
 
 int
 mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
-                             const struct net_device *out_dev, bool ingress)
+                             const struct net_device *out_dev, bool ingress,
+                             struct netlink_ext_ack *extack)
 {
        struct mlxsw_afa_mirror *mirror;
        int err;
 
        mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
                                         ingress);
-       if (IS_ERR(mirror))
+       if (IS_ERR(mirror)) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot create mirror action");
                return PTR_ERR(mirror);
-
+       }
        err = mlxsw_afa_block_append_allocated_mirror(block, mirror->span_id);
-       if (err)
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot append mirror action");
                goto err_append_allocated_mirror;
+       }
 
        return 0;
 
@@ -987,23 +1029,29 @@ mlxsw_afa_forward_pack(char *payload, enum mlxsw_afa_forward_type type,
 }
 
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
-                              u8 local_port, bool in_port)
+                              u8 local_port, bool in_port,
+                              struct netlink_ext_ack *extack)
 {
        struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref;
        u32 kvdl_index;
        char *act;
        int err;
 
-       if (in_port)
+       if (in_port) {
+               NL_SET_ERR_MSG_MOD(extack, "Forwarding to ingress port is not supported");
                return -EOPNOTSUPP;
+       }
        fwd_entry_ref = mlxsw_afa_fwd_entry_ref_create(block, local_port);
-       if (IS_ERR(fwd_entry_ref))
+       if (IS_ERR(fwd_entry_ref)) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot create forward action");
                return PTR_ERR(fwd_entry_ref);
+       }
        kvdl_index = fwd_entry_ref->fwd_entry->kvdl_index;
 
        act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE,
                                            MLXSW_AFA_FORWARD_SIZE);
        if (IS_ERR(act)) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot append forward action");
                err = PTR_ERR(act);
                goto err_append_action;
        }
@@ -1068,21 +1116,25 @@ int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block,
 EXPORT_SYMBOL(mlxsw_afa_block_append_allocated_counter);
 
 int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block,
-                                  u32 *p_counter_index)
+                                  u32 *p_counter_index,
+                                  struct netlink_ext_ack *extack)
 {
        struct mlxsw_afa_counter *counter;
        u32 counter_index;
        int err;
 
        counter = mlxsw_afa_counter_create(block);
-       if (IS_ERR(counter))
+       if (IS_ERR(counter)) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot create count action");
                return PTR_ERR(counter);
+       }
        counter_index = counter->counter_index;
 
        err = mlxsw_afa_block_append_allocated_counter(block, counter_index);
-       if (err)
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot append count action");
                goto err_append_allocated_counter;
-
+       }
        if (p_counter_index)
                *p_counter_index = counter_index;
        return 0;
@@ -1125,13 +1177,16 @@ static inline void mlxsw_afa_virfwd_pack(char *payload,
        mlxsw_afa_virfwd_fid_set(payload, fid);
 }
 
-int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid)
+int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid,
+                                  struct netlink_ext_ack *extack)
 {
        char *act = mlxsw_afa_block_append_action(block,
                                                  MLXSW_AFA_VIRFWD_CODE,
                                                  MLXSW_AFA_VIRFWD_SIZE);
-       if (IS_ERR(act))
+       if (IS_ERR(act)) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot append fid_set action");
                return PTR_ERR(act);
+       }
        mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid);
        return 0;
 }
index 3a155d1043845208e0809f83cee4227fcfe964c1..a6ffadd308077a3e0997efa418439e85d249787c 100644 (file)
@@ -45,6 +45,8 @@ struct mlxsw_afa_ops {
        int (*kvdl_set_add)(void *priv, u32 *p_kvdl_index,
                            char *enc_actions, bool is_first);
        void (*kvdl_set_del)(void *priv, u32 kvdl_index, bool is_first);
+       int (*kvdl_set_activity_get)(void *priv, u32 kvdl_index,
+                                    bool *activity);
        int (*kvdl_fwd_entry_add)(void *priv, u32 *p_kvdl_index, u8 local_port);
        void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
        int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
@@ -54,6 +56,7 @@ struct mlxsw_afa_ops {
                          bool ingress, int *p_span_id);
        void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
                           bool ingress);
+       bool dummy_first_set;
 };
 
 struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set,
@@ -64,7 +67,9 @@ struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa);
 void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_commit(struct mlxsw_afa_block *block);
 char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block);
-u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block);
+char *mlxsw_afa_block_cur_set(struct mlxsw_afa_block *block);
+u32 mlxsw_afa_block_first_kvdl_index(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_activity_get(struct mlxsw_afa_block *block, bool *activity);
 int mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
 int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block);
@@ -75,16 +80,21 @@ int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
 int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
                                  u8 local_in_port,
                                  const struct net_device *out_dev,
-                                 bool ingress);
+                                 bool ingress,
+                                 struct netlink_ext_ack *extack);
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
-                              u8 local_port, bool in_port);
+                              u8 local_port, bool in_port,
+                              struct netlink_ext_ack *extack);
 int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
-                                      u16 vid, u8 pcp, u8 et);
+                                      u16 vid, u8 pcp, u8 et,
+                                      struct netlink_ext_ack *extack);
 int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block,
                                             u32 counter_index);
 int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block,
-                                  u32 *p_counter_index);
-int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid);
+                                  u32 *p_counter_index,
+                                  struct netlink_ext_ack *extack);
+int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid,
+                                  struct netlink_ext_ack *extack);
 int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
                                    u16 expected_irif, u16 min_mtu,
                                    bool rmid_valid, u32 kvdl_index);
index b32a00972e836762bc8f07d50fb08223571bf109..9649b4d9349af578d597319301406195cc838072 100644 (file)
@@ -43,6 +43,7 @@
 struct mlxsw_afk {
        struct list_head key_info_list;
        unsigned int max_blocks;
+       const struct mlxsw_afk_ops *ops;
        const struct mlxsw_afk_block *blocks;
        unsigned int blocks_count;
 };
@@ -69,8 +70,7 @@ static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk)
 }
 
 struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
-                                  const struct mlxsw_afk_block *blocks,
-                                  unsigned int blocks_count)
+                                  const struct mlxsw_afk_ops *ops)
 {
        struct mlxsw_afk *mlxsw_afk;
 
@@ -79,8 +79,9 @@ struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
                return NULL;
        INIT_LIST_HEAD(&mlxsw_afk->key_info_list);
        mlxsw_afk->max_blocks = max_blocks;
-       mlxsw_afk->blocks = blocks;
-       mlxsw_afk->blocks_count = blocks_count;
+       mlxsw_afk->ops = ops;
+       mlxsw_afk->blocks = ops->blocks;
+       mlxsw_afk->blocks_count = ops->blocks_count;
        WARN_ON(!mlxsw_afk_blocks_check(mlxsw_afk));
        return mlxsw_afk;
 }
@@ -415,61 +416,76 @@ void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values,
 }
 EXPORT_SYMBOL(mlxsw_afk_values_add_buf);
 
-static void mlxsw_afk_encode_u32(const struct mlxsw_item *storage_item,
-                                const struct mlxsw_item *output_item,
-                                char *storage, char *output_indexed)
+static void mlxsw_sp_afk_encode_u32(const struct mlxsw_item *storage_item,
+                                   const struct mlxsw_item *output_item,
+                                   char *storage, char *output)
 {
        u32 value;
 
        value = __mlxsw_item_get32(storage, storage_item, 0);
-       __mlxsw_item_set32(output_indexed, output_item, 0, value);
+       __mlxsw_item_set32(output, output_item, 0, value);
 }
 
-static void mlxsw_afk_encode_buf(const struct mlxsw_item *storage_item,
-                                const struct mlxsw_item *output_item,
-                                char *storage, char *output_indexed)
+static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item,
+                                   const struct mlxsw_item *output_item,
+                                   char *storage, char *output)
 {
        char *storage_data = __mlxsw_item_data(storage, storage_item, 0);
-       char *output_data = __mlxsw_item_data(output_indexed, output_item, 0);
+       char *output_data = __mlxsw_item_data(output, output_item, 0);
        size_t len = output_item->size.bytes;
 
        memcpy(output_data, storage_data, len);
 }
 
-#define MLXSW_AFK_KEY_BLOCK_SIZE 16
-
-static void mlxsw_afk_encode_one(const struct mlxsw_afk_element_inst *elinst,
-                                int block_index, char *storage, char *output)
+static void
+mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst,
+                       char *output, char *storage)
 {
-       char *output_indexed = output + block_index * MLXSW_AFK_KEY_BLOCK_SIZE;
        const struct mlxsw_item *storage_item = &elinst->info->item;
        const struct mlxsw_item *output_item = &elinst->item;
 
        if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32)
-               mlxsw_afk_encode_u32(storage_item, output_item,
-                                    storage, output_indexed);
+               mlxsw_sp_afk_encode_u32(storage_item, output_item,
+                                       storage, output);
        else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF)
-               mlxsw_afk_encode_buf(storage_item, output_item,
-                                    storage, output_indexed);
+               mlxsw_sp_afk_encode_buf(storage_item, output_item,
+                                       storage, output);
 }
 
-void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info,
+#define MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE 16
+
+void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
+                     struct mlxsw_afk_key_info *key_info,
                      struct mlxsw_afk_element_values *values,
-                     char *key, char *mask)
+                     char *key, char *mask, int block_start, int block_end)
 {
+       char block_mask[MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE];
+       char block_key[MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE];
        const struct mlxsw_afk_element_inst *elinst;
        enum mlxsw_afk_element element;
-       int block_index;
+       int block_index, i;
+
+       for (i = block_start; i <= block_end; i++) {
+               memset(block_key, 0, MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE);
+               memset(block_mask, 0, MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE);
+
+               mlxsw_afk_element_usage_for_each(element, &values->elusage) {
+                       elinst = mlxsw_afk_key_info_elinst_get(key_info,
+                                                              element,
+                                                              &block_index);
+                       if (!elinst || block_index != i)
+                               continue;
+
+                       mlxsw_sp_afk_encode_one(elinst, block_key,
+                                               values->storage.key);
+                       mlxsw_sp_afk_encode_one(elinst, block_mask,
+                                               values->storage.mask);
+               }
 
-       mlxsw_afk_element_usage_for_each(element, &values->elusage) {
-               elinst = mlxsw_afk_key_info_elinst_get(key_info, element,
-                                                      &block_index);
-               if (!elinst)
-                       continue;
-               mlxsw_afk_encode_one(elinst, block_index,
-                                    values->storage.key, key);
-               mlxsw_afk_encode_one(elinst, block_index,
-                                    values->storage.mask, mask);
+               if (key)
+                       mlxsw_afk->ops->encode_block(block_key, i, key);
+               if (mask)
+                       mlxsw_afk->ops->encode_block(block_mask, i, mask);
        }
 }
 EXPORT_SYMBOL(mlxsw_afk_encode);
index 122506daa586070321c079d53c9d9ff6a9037c45..18d9bfed6001c9d3f6404f3f0bb35dad6da3b8db 100644 (file)
 
 enum mlxsw_afk_element {
        MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
-       MLXSW_AFK_ELEMENT_DMAC,
-       MLXSW_AFK_ELEMENT_SMAC,
+       MLXSW_AFK_ELEMENT_DMAC_32_47,
+       MLXSW_AFK_ELEMENT_DMAC_0_31,
+       MLXSW_AFK_ELEMENT_SMAC_32_47,
+       MLXSW_AFK_ELEMENT_SMAC_0_31,
        MLXSW_AFK_ELEMENT_ETHERTYPE,
        MLXSW_AFK_ELEMENT_IP_PROTO,
-       MLXSW_AFK_ELEMENT_SRC_IP4,
-       MLXSW_AFK_ELEMENT_DST_IP4,
-       MLXSW_AFK_ELEMENT_SRC_IP6_HI,
-       MLXSW_AFK_ELEMENT_SRC_IP6_LO,
-       MLXSW_AFK_ELEMENT_DST_IP6_HI,
-       MLXSW_AFK_ELEMENT_DST_IP6_LO,
+       MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+       MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+       MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+       MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+       MLXSW_AFK_ELEMENT_DST_IP_96_127,
+       MLXSW_AFK_ELEMENT_DST_IP_64_95,
+       MLXSW_AFK_ELEMENT_DST_IP_32_63,
+       MLXSW_AFK_ELEMENT_DST_IP_0_31,
        MLXSW_AFK_ELEMENT_DST_L4_PORT,
        MLXSW_AFK_ELEMENT_SRC_L4_PORT,
        MLXSW_AFK_ELEMENT_VID,
@@ -99,9 +103,11 @@ struct mlxsw_afk_element_info {
  * define an internal storage geometry.
  */
 static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
-       MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16),
-       MLXSW_AFK_ELEMENT_INFO_BUF(DMAC, 0x04, 6),
-       MLXSW_AFK_ELEMENT_INFO_BUF(SMAC, 0x0A, 6),
+       MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 8),
+       MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2),
+       MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4),
+       MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2),
+       MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_0_31, 0x0C, 4),
        MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16),
        MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8),
        MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12),
@@ -112,12 +118,14 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
        MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8),
        MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2),
        MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6),
-       MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x20, 0, 32),
-       MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x24, 0, 32),
-       MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x20, 8),
-       MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_LO, 0x28, 8),
-       MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_HI, 0x30, 8),
-       MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_LO, 0x38, 8),
+       MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4),
+       MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4),
+       MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4),
+       MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_0_31, 0x2C, 4),
+       MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_96_127, 0x30, 4),
+       MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4),
+       MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4),
+       MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4),
 };
 
 #define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40
@@ -208,9 +216,14 @@ mlxsw_afk_element_usage_subset(struct mlxsw_afk_element_usage *elusage_small,
 
 struct mlxsw_afk;
 
+struct mlxsw_afk_ops {
+       const struct mlxsw_afk_block *blocks;
+       unsigned int blocks_count;
+       void (*encode_block)(char *block, int block_index, char *output);
+};
+
 struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
-                                  const struct mlxsw_afk_block *blocks,
-                                  unsigned int blocks_count);
+                                  const struct mlxsw_afk_ops *ops);
 void mlxsw_afk_destroy(struct mlxsw_afk *mlxsw_afk);
 
 struct mlxsw_afk_key_info;
@@ -243,8 +256,9 @@ void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values,
                              enum mlxsw_afk_element element,
                              const char *key_value, const char *mask_value,
                              unsigned int len);
-void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info,
+void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
+                     struct mlxsw_afk_key_info *key_info,
                      struct mlxsw_afk_element_values *values,
-                     char *key, char *mask);
+                     char *key, char *mask, int block_start, int block_end);
 
 #endif
index d65582325cd54e3334133f11fea13d500a8d711b..7461f8fe11335d12339eb4841e1652805cb2df91 100644 (file)
@@ -39,6 +39,7 @@
 
 #define PCI_DEVICE_ID_MELLANOX_SWITCHX2                0xc738
 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM                0xcb84
+#define PCI_DEVICE_ID_MELLANOX_SPECTRUM2       0xcf6c
 #define PCI_DEVICE_ID_MELLANOX_SWITCHIB                0xcb20
 #define PCI_DEVICE_ID_MELLANOX_SWITCHIB2       0xcf08
 
index 1877d9f8a11a2b5d93fc2904fae773bb80fb630b..e52841627966396a2a027c761fcd8087a2085d0e 100644 (file)
@@ -39,6 +39,7 @@
 #ifndef _MLXSW_REG_H
 #define _MLXSW_REG_H
 
+#include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/if_vlan.h>
@@ -1943,6 +1944,28 @@ static inline void mlxsw_reg_cwtpm_pack(char *payload, u8 local_port,
        mlxsw_reg_cwtpm_ntcp_r_set(payload, profile);
 }
 
+/* PGCR - Policy-Engine General Configuration Register
+ * ---------------------------------------------------
+ * This register configures general Policy-Engine settings.
+ */
+#define MLXSW_REG_PGCR_ID 0x3001
+#define MLXSW_REG_PGCR_LEN 0x20
+
+MLXSW_REG_DEFINE(pgcr, MLXSW_REG_PGCR_ID, MLXSW_REG_PGCR_LEN);
+
+/* reg_pgcr_default_action_pointer_base
+ * Default action pointer base. Each region has a default action pointer
+ * which is equal to default_action_pointer_base + region_id.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pgcr, default_action_pointer_base, 0x1C, 0, 24);
+
+static inline void mlxsw_reg_pgcr_pack(char *payload, u32 pointer_base)
+{
+       MLXSW_REG_ZERO(pgcr, payload);
+       mlxsw_reg_pgcr_default_action_pointer_base_set(payload, pointer_base);
+}
+
 /* PPBT - Policy-Engine Port Binding Table
  * ---------------------------------------
  * This register is used for configuration of the Port Binding Table.
@@ -2132,14 +2155,18 @@ MLXSW_ITEM32(reg, ptar, op, 0x00, 28, 4);
 
 /* reg_ptar_action_set_type
  * Type of action set to be used on this region.
- * For Spectrum, this is always type 2 - "flexible"
+ * For Spectrum and Spectrum-2, this is always type 2 - "flexible"
  * Access: WO
  */
 MLXSW_ITEM32(reg, ptar, action_set_type, 0x00, 16, 8);
 
+enum mlxsw_reg_ptar_key_type {
+       MLXSW_REG_PTAR_KEY_TYPE_FLEX = 0x50, /* Spetrum */
+       MLXSW_REG_PTAR_KEY_TYPE_FLEX2 = 0x51, /* Spectrum-2 */
+};
+
 /* reg_ptar_key_type
  * TCAM key type for the region.
- * For Spectrum, this is always type 0x50 - "FLEX_KEY"
  * Access: WO
  */
 MLXSW_ITEM32(reg, ptar, key_type, 0x00, 0, 8);
@@ -2182,13 +2209,14 @@ MLXSW_ITEM8_INDEXED(reg, ptar, flexible_key_id, 0x20, 0, 8,
                    MLXSW_REG_PTAR_KEY_ID_LEN, 0x00, false);
 
 static inline void mlxsw_reg_ptar_pack(char *payload, enum mlxsw_reg_ptar_op op,
+                                      enum mlxsw_reg_ptar_key_type key_type,
                                       u16 region_size, u16 region_id,
                                       const char *tcam_region_info)
 {
        MLXSW_REG_ZERO(ptar, payload);
        mlxsw_reg_ptar_op_set(payload, op);
        mlxsw_reg_ptar_action_set_type_set(payload, 2); /* "flexible" */
-       mlxsw_reg_ptar_key_type_set(payload, 0x50); /* "FLEX_KEY" */
+       mlxsw_reg_ptar_key_type_set(payload, key_type);
        mlxsw_reg_ptar_region_size_set(payload, region_size);
        mlxsw_reg_ptar_region_id_set(payload, region_id);
        mlxsw_reg_ptar_tcam_region_info_memcpy_to(payload, tcam_region_info);
@@ -2327,6 +2355,23 @@ MLXSW_REG_DEFINE(pefa, MLXSW_REG_PEFA_ID, MLXSW_REG_PEFA_LEN);
  */
 MLXSW_ITEM32(reg, pefa, index, 0x00, 0, 24);
 
+/* reg_pefa_a
+ * Index in the KVD Linear Centralized Database.
+ * Activity
+ * For a new entry: set if ca=0, clear if ca=1
+ * Set if a packet lookup has hit on the specific entry
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pefa, a, 0x04, 29, 1);
+
+/* reg_pefa_ca
+ * Clear activity
+ * When write: activity is according to this field
+ * When read: after reading the activity is cleared according to ca
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, pefa, ca, 0x04, 24, 1);
+
 #define MLXSW_REG_FLEX_ACTION_SET_LEN 0xA8
 
 /* reg_pefa_flex_action_set
@@ -2336,12 +2381,20 @@ MLXSW_ITEM32(reg, pefa, index, 0x00, 0, 24);
  */
 MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, MLXSW_REG_FLEX_ACTION_SET_LEN);
 
-static inline void mlxsw_reg_pefa_pack(char *payload, u32 index,
+static inline void mlxsw_reg_pefa_pack(char *payload, u32 index, bool ca,
                                       const char *flex_action_set)
 {
        MLXSW_REG_ZERO(pefa, payload);
        mlxsw_reg_pefa_index_set(payload, index);
-       mlxsw_reg_pefa_flex_action_set_memcpy_to(payload, flex_action_set);
+       mlxsw_reg_pefa_ca_set(payload, ca);
+       if (flex_action_set)
+               mlxsw_reg_pefa_flex_action_set_memcpy_to(payload,
+                                                        flex_action_set);
+}
+
+static inline void mlxsw_reg_pefa_unpack(char *payload, bool *p_a)
+{
+       *p_a = mlxsw_reg_pefa_a_get(payload);
 }
 
 /* PTCE-V2 - Policy-Engine TCAM Entry Register Version 2
@@ -2397,6 +2450,15 @@ MLXSW_ITEM32(reg, ptce2, op, 0x00, 20, 3);
  */
 MLXSW_ITEM32(reg, ptce2, offset, 0x00, 0, 16);
 
+/* reg_ptce2_priority
+ * Priority of the rule, higher values win. The range is 1..cap_kvd_size-1.
+ * Note: priority does not have to be unique per rule.
+ * Within a region, higher priority should have lower offset (no limitation
+ * between regions in a multi-region).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce2, priority, 0x04, 0, 24);
+
 /* reg_ptce2_tcam_region_info
  * Opaque object that represents the TCAM region.
  * Access: Index
@@ -2404,14 +2466,14 @@ MLXSW_ITEM32(reg, ptce2, offset, 0x00, 0, 16);
 MLXSW_ITEM_BUF(reg, ptce2, tcam_region_info, 0x10,
               MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN);
 
-#define MLXSW_REG_PTCE2_FLEX_KEY_BLOCKS_LEN 96
+#define MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN 96
 
 /* reg_ptce2_flex_key_blocks
  * ACL Key.
  * Access: RW
  */
 MLXSW_ITEM_BUF(reg, ptce2, flex_key_blocks, 0x20,
-              MLXSW_REG_PTCE2_FLEX_KEY_BLOCKS_LEN);
+              MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
 
 /* reg_ptce2_mask
  * mask- in the same size as key. A bit that is set directs the TCAM
@@ -2420,7 +2482,7 @@ MLXSW_ITEM_BUF(reg, ptce2, flex_key_blocks, 0x20,
  * Access: RW
  */
 MLXSW_ITEM_BUF(reg, ptce2, mask, 0x80,
-              MLXSW_REG_PTCE2_FLEX_KEY_BLOCKS_LEN);
+              MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
 
 /* reg_ptce2_flex_action_set
  * ACL action set.
@@ -2432,15 +2494,567 @@ MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0,
 static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid,
                                        enum mlxsw_reg_ptce2_op op,
                                        const char *tcam_region_info,
-                                       u16 offset)
+                                       u16 offset, u32 priority)
 {
        MLXSW_REG_ZERO(ptce2, payload);
        mlxsw_reg_ptce2_v_set(payload, valid);
        mlxsw_reg_ptce2_op_set(payload, op);
        mlxsw_reg_ptce2_offset_set(payload, offset);
+       mlxsw_reg_ptce2_priority_set(payload, priority);
        mlxsw_reg_ptce2_tcam_region_info_memcpy_to(payload, tcam_region_info);
 }
 
+/* PERPT - Policy-Engine ERP Table Register
+ * ----------------------------------------
+ * This register adds and removes eRPs from the eRP table.
+ */
+#define MLXSW_REG_PERPT_ID 0x3021
+#define MLXSW_REG_PERPT_LEN 0x80
+
+MLXSW_REG_DEFINE(perpt, MLXSW_REG_PERPT_ID, MLXSW_REG_PERPT_LEN);
+
+/* reg_perpt_erpt_bank
+ * eRP table bank.
+ * Range 0 .. cap_max_erp_table_banks - 1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, perpt, erpt_bank, 0x00, 16, 4);
+
+/* reg_perpt_erpt_index
+ * Index to eRP table within the eRP bank.
+ * Range is 0 .. cap_max_erp_table_bank_size - 1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, perpt, erpt_index, 0x00, 0, 8);
+
+enum mlxsw_reg_perpt_key_size {
+       MLXSW_REG_PERPT_KEY_SIZE_2KB,
+       MLXSW_REG_PERPT_KEY_SIZE_4KB,
+       MLXSW_REG_PERPT_KEY_SIZE_8KB,
+       MLXSW_REG_PERPT_KEY_SIZE_12KB,
+};
+
+/* reg_perpt_key_size
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, perpt, key_size, 0x04, 0, 4);
+
+/* reg_perpt_bf_bypass
+ * 0 - The eRP is used only if bloom filter state is set for the given
+ * rule.
+ * 1 - The eRP is used regardless of bloom filter state.
+ * The bypass is an OR condition of region_id or eRP. See PERCR.bf_bypass
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, perpt, bf_bypass, 0x08, 8, 1);
+
+/* reg_perpt_erp_id
+ * eRP ID for use by the rules.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, perpt, erp_id, 0x08, 0, 4);
+
+/* reg_perpt_erpt_base_bank
+ * Base eRP table bank, points to head of erp_vector
+ * Range is 0 .. cap_max_erp_table_banks - 1
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, perpt, erpt_base_bank, 0x0C, 16, 4);
+
+/* reg_perpt_erpt_base_index
+ * Base index to eRP table within the eRP bank
+ * Range is 0 .. cap_max_erp_table_bank_size - 1
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, perpt, erpt_base_index, 0x0C, 0, 8);
+
+/* reg_perpt_erp_index_in_vector
+ * eRP index in the vector.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, perpt, erp_index_in_vector, 0x10, 0, 4);
+
+/* reg_perpt_erp_vector
+ * eRP vector.
+ * Access: OP
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, perpt, erp_vector, 0x14, 4, 1);
+
+/* reg_perpt_mask
+ * Mask
+ * 0 - A-TCAM will ignore the bit in key
+ * 1 - A-TCAM will compare the bit in key
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, perpt, mask, 0x20, MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
+
+static inline void mlxsw_reg_perpt_erp_vector_pack(char *payload,
+                                                  unsigned long *erp_vector,
+                                                  unsigned long size)
+{
+       unsigned long bit;
+
+       for_each_set_bit(bit, erp_vector, size)
+               mlxsw_reg_perpt_erp_vector_set(payload, bit, true);
+}
+
+static inline void
+mlxsw_reg_perpt_pack(char *payload, u8 erpt_bank, u8 erpt_index,
+                    enum mlxsw_reg_perpt_key_size key_size, u8 erp_id,
+                    u8 erpt_base_bank, u8 erpt_base_index, u8 erp_index,
+                    char *mask)
+{
+       MLXSW_REG_ZERO(perpt, payload);
+       mlxsw_reg_perpt_erpt_bank_set(payload, erpt_bank);
+       mlxsw_reg_perpt_erpt_index_set(payload, erpt_index);
+       mlxsw_reg_perpt_key_size_set(payload, key_size);
+       mlxsw_reg_perpt_bf_bypass_set(payload, true);
+       mlxsw_reg_perpt_erp_id_set(payload, erp_id);
+       mlxsw_reg_perpt_erpt_base_bank_set(payload, erpt_base_bank);
+       mlxsw_reg_perpt_erpt_base_index_set(payload, erpt_base_index);
+       mlxsw_reg_perpt_erp_index_in_vector_set(payload, erp_index);
+       mlxsw_reg_perpt_mask_memcpy_to(payload, mask);
+}
+
+/* PERAR - Policy-Engine Region Association Register
+ * -------------------------------------------------
+ * This register associates a hw region for region_id's. Changing on the fly
+ * is supported by the device.
+ */
+#define MLXSW_REG_PERAR_ID 0x3026
+#define MLXSW_REG_PERAR_LEN 0x08
+
+MLXSW_REG_DEFINE(perar, MLXSW_REG_PERAR_ID, MLXSW_REG_PERAR_LEN);
+
+/* reg_perar_region_id
+ * Region identifier
+ * Range 0 .. cap_max_regions-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, perar, region_id, 0x00, 0, 16);
+
+static inline unsigned int
+mlxsw_reg_perar_hw_regions_needed(unsigned int block_num)
+{
+       return DIV_ROUND_UP(block_num, 4);
+}
+
+/* reg_perar_hw_region
+ * HW Region
+ * Range 0 .. cap_max_regions-1
+ * Default: hw_region = region_id
+ * For a 8 key block region, 2 consecutive regions are used
+ * For a 12 key block region, 3 consecutive regions are used
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, perar, hw_region, 0x04, 0, 16);
+
+static inline void mlxsw_reg_perar_pack(char *payload, u16 region_id,
+                                       u16 hw_region)
+{
+       MLXSW_REG_ZERO(perar, payload);
+       mlxsw_reg_perar_region_id_set(payload, region_id);
+       mlxsw_reg_perar_hw_region_set(payload, hw_region);
+}
+
+/* PTCE-V3 - Policy-Engine TCAM Entry Register Version 3
+ * -----------------------------------------------------
+ * This register is a new version of PTCE-V2 in order to support the
+ * A-TCAM. This register is not supported by SwitchX/-2 and Spectrum.
+ */
+#define MLXSW_REG_PTCE3_ID 0x3027
+#define MLXSW_REG_PTCE3_LEN 0xF0
+
+MLXSW_REG_DEFINE(ptce3, MLXSW_REG_PTCE3_ID, MLXSW_REG_PTCE3_LEN);
+
+/* reg_ptce3_v
+ * Valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce3, v, 0x00, 31, 1);
+
+enum mlxsw_reg_ptce3_op {
+       /* Write operation. Used to write a new entry to the table.
+        * All R/W fields are relevant for new entry. Activity bit is set
+        * for new entries. Write with v = 0 will delete the entry. Must
+        * not be used if an entry exists.
+        */
+        MLXSW_REG_PTCE3_OP_WRITE_WRITE = 0,
+        /* Update operation */
+        MLXSW_REG_PTCE3_OP_WRITE_UPDATE = 1,
+        /* Read operation */
+        MLXSW_REG_PTCE3_OP_QUERY_READ = 0,
+};
+
+/* reg_ptce3_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ptce3, op, 0x00, 20, 3);
+
+/* reg_ptce3_priority
+ * Priority of the rule. Higher values win.
+ * For Spectrum-2 range is 1..cap_kvd_size - 1
+ * Note: Priority does not have to be unique per rule.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce3, priority, 0x04, 0, 24);
+
+/* reg_ptce3_tcam_region_info
+ * Opaque object that represents the TCAM region.
+ * Access: Index
+ */
+MLXSW_ITEM_BUF(reg, ptce3, tcam_region_info, 0x10,
+              MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN);
+
+/* reg_ptce3_flex2_key_blocks
+ * ACL key. The key must be masked according to eRP (if exists) or
+ * according to master mask.
+ * Access: Index
+ */
+MLXSW_ITEM_BUF(reg, ptce3, flex2_key_blocks, 0x20,
+              MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
+
+/* reg_ptce3_erp_id
+ * eRP ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptce3, erp_id, 0x80, 0, 4);
+
+/* reg_ptce3_delta_start
+ * Start point of delta_value and delta_mask, in bits. Must not exceed
+ * num_key_blocks * 36 - 8. Reserved when delta_mask = 0.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptce3, delta_start, 0x84, 0, 10);
+
+/* reg_ptce3_delta_mask
+ * Delta mask.
+ * 0 - Ignore relevant bit in delta_value
+ * 1 - Compare relevant bit in delta_value
+ * Delta mask must not be set for reserved fields in the key blocks.
+ * Note: No delta when no eRPs. Thus, for regions with
+ * PERERP.erpt_pointer_valid = 0 the delta mask must be 0.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptce3, delta_mask, 0x88, 16, 8);
+
+/* reg_ptce3_delta_value
+ * Delta value.
+ * Bits which are masked by delta_mask must be 0.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptce3, delta_value, 0x88, 0, 8);
+
+/* reg_ptce3_prune_vector
+ * Pruning vector relative to the PERPT.erp_id.
+ * Used for reducing lookups.
+ * 0 - NEED: Do a lookup using the eRP.
+ * 1 - PRUNE: Do not perform a lookup using the eRP.
+ * Maybe be modified by PEAPBL and PEAPBM.
+ * Note: In Spectrum-2, a region of 8 key blocks must be set to either
+ * all 1's or all 0's.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, ptce3, prune_vector, 0x90, 4, 1);
+
+/* reg_ptce3_prune_ctcam
+ * Pruning on C-TCAM. Used for reducing lookups.
+ * 0 - NEED: Do a lookup in the C-TCAM.
+ * 1 - PRUNE: Do not perform a lookup in the C-TCAM.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce3, prune_ctcam, 0x94, 31, 1);
+
+/* reg_ptce3_large_exists
+ * Large entry key ID exists.
+ * Within the region:
+ * 0 - SINGLE: The large_entry_key_id is not currently in use.
+ * For rule insert: The MSB of the key (blocks 6..11) will be added.
+ * For rule delete: The MSB of the key will be removed.
+ * 1 - NON_SINGLE: The large_entry_key_id is currently in use.
+ * For rule insert: The MSB of the key (blocks 6..11) will not be added.
+ * For rule delete: The MSB of the key will not be removed.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ptce3, large_exists, 0x98, 31, 1);
+
+/* reg_ptce3_large_entry_key_id
+ * Large entry key ID.
+ * A key for 12 key blocks rules. Reserved when region has less than 12 key
+ * blocks. Must be different for different keys which have the same common
+ * 6 key blocks (MSB, blocks 6..11) key within a region.
+ * Range is 0..cap_max_pe_large_key_id - 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce3, large_entry_key_id, 0x98, 0, 24);
+
+/* reg_ptce3_action_pointer
+ * Pointer to action.
+ * Range is 0..cap_max_kvd_action_sets - 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce3, action_pointer, 0xA0, 0, 24);
+
+static inline void mlxsw_reg_ptce3_pack(char *payload, bool valid,
+                                       enum mlxsw_reg_ptce3_op op,
+                                       u32 priority,
+                                       const char *tcam_region_info,
+                                       const char *key, u8 erp_id,
+                                       bool large_exists, u32 lkey_id,
+                                       u32 action_pointer)
+{
+       MLXSW_REG_ZERO(ptce3, payload);
+       mlxsw_reg_ptce3_v_set(payload, valid);
+       mlxsw_reg_ptce3_op_set(payload, op);
+       mlxsw_reg_ptce3_priority_set(payload, priority);
+       mlxsw_reg_ptce3_tcam_region_info_memcpy_to(payload, tcam_region_info);
+       mlxsw_reg_ptce3_flex2_key_blocks_memcpy_to(payload, key);
+       mlxsw_reg_ptce3_erp_id_set(payload, erp_id);
+       mlxsw_reg_ptce3_large_exists_set(payload, large_exists);
+       mlxsw_reg_ptce3_large_entry_key_id_set(payload, lkey_id);
+       mlxsw_reg_ptce3_action_pointer_set(payload, action_pointer);
+}
+
+/* PERCR - Policy-Engine Region Configuration Register
+ * ---------------------------------------------------
+ * This register configures the region parameters. The region_id must be
+ * allocated.
+ */
+#define MLXSW_REG_PERCR_ID 0x302A
+#define MLXSW_REG_PERCR_LEN 0x80
+
+MLXSW_REG_DEFINE(percr, MLXSW_REG_PERCR_ID, MLXSW_REG_PERCR_LEN);
+
+/* reg_percr_region_id
+ * Region identifier.
+ * Range 0..cap_max_regions-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, percr, region_id, 0x00, 0, 16);
+
+/* reg_percr_atcam_ignore_prune
+ * Ignore prune_vector by other A-TCAM rules. Used e.g., for a new rule.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, percr, atcam_ignore_prune, 0x04, 25, 1);
+
+/* reg_percr_ctcam_ignore_prune
+ * Ignore prune_ctcam by other A-TCAM rules. Used e.g., for a new rule.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, percr, ctcam_ignore_prune, 0x04, 24, 1);
+
+/* reg_percr_bf_bypass
+ * Bloom filter bypass.
+ * 0 - Bloom filter is used (default)
+ * 1 - Bloom filter is bypassed. The bypass is an OR condition of
+ * region_id or eRP. See PERPT.bf_bypass
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, percr, bf_bypass, 0x04, 16, 1);
+
+/* reg_percr_master_mask
+ * Master mask. Logical OR mask of all masks of all rules of a region
+ * (both A-TCAM and C-TCAM). When there are no eRPs
+ * (erpt_pointer_valid = 0), then this provides the mask.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, percr, master_mask, 0x20, 96);
+
+static inline void mlxsw_reg_percr_pack(char *payload, u16 region_id)
+{
+       MLXSW_REG_ZERO(percr, payload);
+       mlxsw_reg_percr_region_id_set(payload, region_id);
+       mlxsw_reg_percr_atcam_ignore_prune_set(payload, false);
+       mlxsw_reg_percr_ctcam_ignore_prune_set(payload, false);
+       mlxsw_reg_percr_bf_bypass_set(payload, true);
+}
+
+/* PERERP - Policy-Engine Region eRP Register
+ * ------------------------------------------
+ * This register configures the region eRP. The region_id must be
+ * allocated.
+ */
+#define MLXSW_REG_PERERP_ID 0x302B
+#define MLXSW_REG_PERERP_LEN 0x1C
+
+MLXSW_REG_DEFINE(pererp, MLXSW_REG_PERERP_ID, MLXSW_REG_PERERP_LEN);
+
+/* reg_pererp_region_id
+ * Region identifier.
+ * Range 0..cap_max_regions-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pererp, region_id, 0x00, 0, 16);
+
+/* reg_pererp_ctcam_le
+ * C-TCAM lookup enable. Reserved when erpt_pointer_valid = 0.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pererp, ctcam_le, 0x04, 28, 1);
+
+/* reg_pererp_erpt_pointer_valid
+ * erpt_pointer is valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pererp, erpt_pointer_valid, 0x10, 31, 1);
+
+/* reg_pererp_erpt_bank_pointer
+ * Pointer to eRP table bank. May be modified at any time.
+ * Range 0..cap_max_erp_table_banks-1
+ * Reserved when erpt_pointer_valid = 0
+ */
+MLXSW_ITEM32(reg, pererp, erpt_bank_pointer, 0x10, 16, 4);
+
+/* reg_pererp_erpt_pointer
+ * Pointer to eRP table within the eRP bank. Can be changed for an
+ * existing region.
+ * Range 0..cap_max_erp_table_size-1
+ * Reserved when erpt_pointer_valid = 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pererp, erpt_pointer, 0x10, 0, 8);
+
+/* reg_pererp_erpt_vector
+ * Vector of allowed eRP indexes starting from erpt_pointer within the
+ * erpt_bank_pointer. Next entries will be in next bank.
+ * Note that eRP index is used and not eRP ID.
+ * Reserved when erpt_pointer_valid = 0
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, pererp, erpt_vector, 0x14, 4, 1);
+
+/* reg_pererp_master_rp_id
+ * Master RP ID. When there are no eRPs, then this provides the eRP ID
+ * for the lookup. Can be changed for an existing region.
+ * Reserved when erpt_pointer_valid = 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pererp, master_rp_id, 0x18, 0, 4);
+
+static inline void mlxsw_reg_pererp_erp_vector_pack(char *payload,
+                                                   unsigned long *erp_vector,
+                                                   unsigned long size)
+{
+       unsigned long bit;
+
+       for_each_set_bit(bit, erp_vector, size)
+               mlxsw_reg_pererp_erpt_vector_set(payload, bit, true);
+}
+
+static inline void mlxsw_reg_pererp_pack(char *payload, u16 region_id,
+                                        bool ctcam_le, bool erpt_pointer_valid,
+                                        u8 erpt_bank_pointer, u8 erpt_pointer,
+                                        u8 master_rp_id)
+{
+       MLXSW_REG_ZERO(pererp, payload);
+       mlxsw_reg_pererp_region_id_set(payload, region_id);
+       mlxsw_reg_pererp_ctcam_le_set(payload, ctcam_le);
+       mlxsw_reg_pererp_erpt_pointer_valid_set(payload, erpt_pointer_valid);
+       mlxsw_reg_pererp_erpt_bank_pointer_set(payload, erpt_bank_pointer);
+       mlxsw_reg_pererp_erpt_pointer_set(payload, erpt_pointer);
+       mlxsw_reg_pererp_master_rp_id_set(payload, master_rp_id);
+}
+
+/* IEDR - Infrastructure Entry Delete Register
+ * ----------------------------------------------------
+ * This register is used for deleting entries from the entry tables.
+ * It is legitimate to attempt to delete a nonexisting entry (the device will
+ * respond as a good flow).
+ */
+#define MLXSW_REG_IEDR_ID 0x3804
+#define MLXSW_REG_IEDR_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_IEDR_REC_LEN 0x8 /* record length */
+#define MLXSW_REG_IEDR_REC_MAX_COUNT 64
+#define MLXSW_REG_IEDR_LEN (MLXSW_REG_IEDR_BASE_LEN +  \
+                           MLXSW_REG_IEDR_REC_LEN *    \
+                           MLXSW_REG_IEDR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(iedr, MLXSW_REG_IEDR_ID, MLXSW_REG_IEDR_LEN);
+
+/* reg_iedr_num_rec
+ * Number of records.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, iedr, num_rec, 0x00, 0, 8);
+
+/* reg_iedr_rec_type
+ * Resource type.
+ * Access: OP
+ */
+MLXSW_ITEM32_INDEXED(reg, iedr, rec_type, MLXSW_REG_IEDR_BASE_LEN, 24, 8,
+                    MLXSW_REG_IEDR_REC_LEN, 0x00, false);
+
+/* reg_iedr_rec_size
+ * Size of entries do be deleted. The unit is 1 entry, regardless of entry type.
+ * Access: OP
+ */
+MLXSW_ITEM32_INDEXED(reg, iedr, rec_size, MLXSW_REG_IEDR_BASE_LEN, 0, 11,
+                    MLXSW_REG_IEDR_REC_LEN, 0x00, false);
+
+/* reg_iedr_rec_index_start
+ * Resource index start.
+ * Access: OP
+ */
+MLXSW_ITEM32_INDEXED(reg, iedr, rec_index_start, MLXSW_REG_IEDR_BASE_LEN, 0, 24,
+                    MLXSW_REG_IEDR_REC_LEN, 0x04, false);
+
+static inline void mlxsw_reg_iedr_pack(char *payload)
+{
+       MLXSW_REG_ZERO(iedr, payload);
+}
+
+static inline void mlxsw_reg_iedr_rec_pack(char *payload, int rec_index,
+                                          u8 rec_type, u16 rec_size,
+                                          u32 rec_index_start)
+{
+       u8 num_rec = mlxsw_reg_iedr_num_rec_get(payload);
+
+       if (rec_index >= num_rec)
+               mlxsw_reg_iedr_num_rec_set(payload, rec_index + 1);
+       mlxsw_reg_iedr_rec_type_set(payload, rec_index, rec_type);
+       mlxsw_reg_iedr_rec_size_set(payload, rec_index, rec_size);
+       mlxsw_reg_iedr_rec_index_start_set(payload, rec_index, rec_index_start);
+}
+
+/* QPTS - QoS Priority Trust State Register
+ * ----------------------------------------
+ * This register controls the port policy to calculate the switch priority and
+ * packet color based on incoming packet fields.
+ */
+#define MLXSW_REG_QPTS_ID 0x4002
+#define MLXSW_REG_QPTS_LEN 0x8
+
+MLXSW_REG_DEFINE(qpts, MLXSW_REG_QPTS_ID, MLXSW_REG_QPTS_LEN);
+
+/* reg_qpts_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is supported.
+ */
+MLXSW_ITEM32(reg, qpts, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_qpts_trust_state {
+       MLXSW_REG_QPTS_TRUST_STATE_PCP = 1,
+       MLXSW_REG_QPTS_TRUST_STATE_DSCP = 2, /* For MPLS, trust EXP. */
+};
+
+/* reg_qpts_trust_state
+ * Trust state for a given port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpts, trust_state, 0x04, 0, 3);
+
+static inline void mlxsw_reg_qpts_pack(char *payload, u8 local_port,
+                                      enum mlxsw_reg_qpts_trust_state ts)
+{
+       MLXSW_REG_ZERO(qpts, payload);
+
+       mlxsw_reg_qpts_local_port_set(payload, local_port);
+       mlxsw_reg_qpts_trust_state_set(payload, ts);
+}
+
 /* QPCR - QoS Policer Configuration Register
  * -----------------------------------------
  * The QPCR register is used to create policers - that limit
@@ -2753,6 +3367,183 @@ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
        mlxsw_reg_qeec_next_element_index_set(payload, next_index);
 }
 
+/* QRWE - QoS ReWrite Enable
+ * -------------------------
+ * This register configures the rewrite enable per receive port.
+ */
+#define MLXSW_REG_QRWE_ID 0x400F
+#define MLXSW_REG_QRWE_LEN 0x08
+
+MLXSW_REG_DEFINE(qrwe, MLXSW_REG_QRWE_ID, MLXSW_REG_QRWE_LEN);
+
+/* reg_qrwe_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is supported. No support for router port.
+ */
+MLXSW_ITEM32(reg, qrwe, local_port, 0x00, 16, 8);
+
+/* reg_qrwe_dscp
+ * Whether to enable DSCP rewrite (default is 0, don't rewrite).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qrwe, dscp, 0x04, 1, 1);
+
+/* reg_qrwe_pcp
+ * Whether to enable PCP and DEI rewrite (default is 0, don't rewrite).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qrwe, pcp, 0x04, 0, 1);
+
+static inline void mlxsw_reg_qrwe_pack(char *payload, u8 local_port,
+                                      bool rewrite_pcp, bool rewrite_dscp)
+{
+       MLXSW_REG_ZERO(qrwe, payload);
+       mlxsw_reg_qrwe_local_port_set(payload, local_port);
+       mlxsw_reg_qrwe_pcp_set(payload, rewrite_pcp);
+       mlxsw_reg_qrwe_dscp_set(payload, rewrite_dscp);
+}
+
+/* QPDSM - QoS Priority to DSCP Mapping
+ * ------------------------------------
+ * QoS Priority to DSCP Mapping Register
+ */
+#define MLXSW_REG_QPDSM_ID 0x4011
+#define MLXSW_REG_QPDSM_BASE_LEN 0x04 /* base length, without records */
+#define MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN 0x4 /* record length */
+#define MLXSW_REG_QPDSM_PRIO_ENTRY_REC_MAX_COUNT 16
+#define MLXSW_REG_QPDSM_LEN (MLXSW_REG_QPDSM_BASE_LEN +                        \
+                            MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN *       \
+                            MLXSW_REG_QPDSM_PRIO_ENTRY_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(qpdsm, MLXSW_REG_QPDSM_ID, MLXSW_REG_QPDSM_LEN);
+
+/* reg_qpdsm_local_port
+ * Local Port. Supported for data packets from CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpdsm, local_port, 0x00, 16, 8);
+
+/* reg_qpdsm_prio_entry_color0_e
+ * Enable update of the entry for color 0 and a given port.
+ * Access: WO
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color0_e,
+                    MLXSW_REG_QPDSM_BASE_LEN, 31, 1,
+                    MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdsm_prio_entry_color0_dscp
+ * DSCP field in the outer label of the packet for color 0 and a given port.
+ * Reserved when e=0.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color0_dscp,
+                    MLXSW_REG_QPDSM_BASE_LEN, 24, 6,
+                    MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdsm_prio_entry_color1_e
+ * Enable update of the entry for color 1 and a given port.
+ * Access: WO
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color1_e,
+                    MLXSW_REG_QPDSM_BASE_LEN, 23, 1,
+                    MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdsm_prio_entry_color1_dscp
+ * DSCP field in the outer label of the packet for color 1 and a given port.
+ * Reserved when e=0.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color1_dscp,
+                    MLXSW_REG_QPDSM_BASE_LEN, 16, 6,
+                    MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdsm_prio_entry_color2_e
+ * Enable update of the entry for color 2 and a given port.
+ * Access: WO
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color2_e,
+                    MLXSW_REG_QPDSM_BASE_LEN, 15, 1,
+                    MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdsm_prio_entry_color2_dscp
+ * DSCP field in the outer label of the packet for color 2 and a given port.
+ * Reserved when e=0.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color2_dscp,
+                    MLXSW_REG_QPDSM_BASE_LEN, 8, 6,
+                    MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_qpdsm_pack(char *payload, u8 local_port)
+{
+       MLXSW_REG_ZERO(qpdsm, payload);
+       mlxsw_reg_qpdsm_local_port_set(payload, local_port);
+}
+
+static inline void
+mlxsw_reg_qpdsm_prio_pack(char *payload, unsigned short prio, u8 dscp)
+{
+       mlxsw_reg_qpdsm_prio_entry_color0_e_set(payload, prio, 1);
+       mlxsw_reg_qpdsm_prio_entry_color0_dscp_set(payload, prio, dscp);
+       mlxsw_reg_qpdsm_prio_entry_color1_e_set(payload, prio, 1);
+       mlxsw_reg_qpdsm_prio_entry_color1_dscp_set(payload, prio, dscp);
+       mlxsw_reg_qpdsm_prio_entry_color2_e_set(payload, prio, 1);
+       mlxsw_reg_qpdsm_prio_entry_color2_dscp_set(payload, prio, dscp);
+}
+
+/* QPDPM - QoS Port DSCP to Priority Mapping Register
+ * --------------------------------------------------
+ * This register controls the mapping from DSCP field to
+ * Switch Priority for IP packets.
+ */
+#define MLXSW_REG_QPDPM_ID 0x4013
+#define MLXSW_REG_QPDPM_BASE_LEN 0x4 /* base length, without records */
+#define MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN 0x2 /* record length */
+#define MLXSW_REG_QPDPM_DSCP_ENTRY_REC_MAX_COUNT 64
+#define MLXSW_REG_QPDPM_LEN (MLXSW_REG_QPDPM_BASE_LEN +                        \
+                            MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN *       \
+                            MLXSW_REG_QPDPM_DSCP_ENTRY_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(qpdpm, MLXSW_REG_QPDPM_ID, MLXSW_REG_QPDPM_LEN);
+
+/* reg_qpdpm_local_port
+ * Local Port. Supported for data packets from CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpdpm, local_port, 0x00, 16, 8);
+
+/* reg_qpdpm_dscp_e
+ * Enable update of the specific entry. When cleared, the switch_prio and color
+ * fields are ignored and the previous switch_prio and color values are
+ * preserved.
+ * Access: WO
+ */
+MLXSW_ITEM16_INDEXED(reg, qpdpm, dscp_entry_e, MLXSW_REG_QPDPM_BASE_LEN, 15, 1,
+                    MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdpm_dscp_prio
+ * The new Switch Priority value for the relevant DSCP value.
+ * Access: RW
+ */
+MLXSW_ITEM16_INDEXED(reg, qpdpm, dscp_entry_prio,
+                    MLXSW_REG_QPDPM_BASE_LEN, 0, 4,
+                    MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_qpdpm_pack(char *payload, u8 local_port)
+{
+       MLXSW_REG_ZERO(qpdpm, payload);
+       mlxsw_reg_qpdpm_local_port_set(payload, local_port);
+}
+
+static inline void
+mlxsw_reg_qpdpm_dscp_pack(char *payload, unsigned short dscp, u8 prio)
+{
+       mlxsw_reg_qpdpm_dscp_entry_e_set(payload, dscp, 1);
+       mlxsw_reg_qpdpm_dscp_entry_prio_set(payload, dscp, prio);
+}
+
 /* PMLP - Ports Module to Local Port Register
  * ------------------------------------------
  * Configures the assignment of modules to local ports.
@@ -3350,6 +4141,7 @@ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2);
 
 enum mlxsw_reg_ppcnt_grp {
        MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0,
+       MLXSW_REG_PPCNT_RFC_2819_CNT = 0x2,
        MLXSW_REG_PPCNT_EXT_CNT = 0x5,
        MLXSW_REG_PPCNT_PRIO_CNT = 0x10,
        MLXSW_REG_PPCNT_TC_CNT = 0x11,
@@ -3508,6 +4300,68 @@ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received,
 MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted,
             MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64);
 
+/* Ethernet RFC 2819 Counter Group */
+
+/* reg_ppcnt_ether_stats_pkts64octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts64octets,
+            MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts65to127octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts65to127octets,
+            MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts128to255octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts128to255octets,
+            MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts256to511octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts256to511octets,
+            MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts512to1023octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts512to1023octets,
+            MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x78, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts1024to1518octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts1024to1518octets,
+            MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x80, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts1519to2047octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts1519to2047octets,
+            MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x88, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts2048to4095octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts2048to4095octets,
+            MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts4096to8191octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts4096to8191octets,
+            MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x98, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts8192to10239octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts8192to10239octets,
+            MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0xA0, 0, 64);
+
 /* Ethernet Extended Counter Group Counters */
 
 /* reg_ppcnt_ecn_marked
@@ -4338,6 +5192,20 @@ MLXSW_ITEM32(reg, ritr, if_swid, 0x08, 24, 8);
  */
 MLXSW_ITEM_BUF(reg, ritr, if_mac, 0x12, 6);
 
+/* reg_ritr_if_vrrp_id_ipv6
+ * VRRP ID for IPv6
+ * Note: Reserved for RIF types other than VLAN, FID and Sub-port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, if_vrrp_id_ipv6, 0x1C, 8, 8);
+
+/* reg_ritr_if_vrrp_id_ipv4
+ * VRRP ID for IPv4
+ * Note: Reserved for RIF types other than VLAN, FID and Sub-port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, if_vrrp_id_ipv4, 0x1C, 0, 8);
+
 /* VLAN Interface */
 
 /* reg_ritr_vlan_if_vid
@@ -7871,6 +8739,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(spvmlr),
        MLXSW_REG(cwtp),
        MLXSW_REG(cwtpm),
+       MLXSW_REG(pgcr),
        MLXSW_REG(ppbt),
        MLXSW_REG(pacl),
        MLXSW_REG(pagt),
@@ -7879,9 +8748,19 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(prcr),
        MLXSW_REG(pefa),
        MLXSW_REG(ptce2),
+       MLXSW_REG(perpt),
+       MLXSW_REG(perar),
+       MLXSW_REG(ptce3),
+       MLXSW_REG(percr),
+       MLXSW_REG(pererp),
+       MLXSW_REG(iedr),
+       MLXSW_REG(qpts),
        MLXSW_REG(qpcr),
        MLXSW_REG(qtct),
        MLXSW_REG(qeec),
+       MLXSW_REG(qrwe),
+       MLXSW_REG(qpdsm),
+       MLXSW_REG(qpdpm),
        MLXSW_REG(pmlp),
        MLXSW_REG(pmtu),
        MLXSW_REG(ptys),
index fd9299ccec7212d896846a747a996a35af388403..bf650f2cd5afbf904744c00ba0ee73a89878cc6b 100644 (file)
@@ -42,6 +42,8 @@ enum mlxsw_res_id {
        MLXSW_RES_ID_KVD_SIZE,
        MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE,
        MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE,
+       MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE,
+       MLXSW_RES_ID_MAX_KVD_ACTION_SETS,
        MLXSW_RES_ID_MAX_TRAP_GROUPS,
        MLXSW_RES_ID_CQE_V0,
        MLXSW_RES_ID_CQE_V1,
@@ -63,6 +65,13 @@ enum mlxsw_res_id {
        MLXSW_RES_ID_ACL_FLEX_KEYS,
        MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE,
        MLXSW_RES_ID_ACL_ACTIONS_PER_SET,
+       MLXSW_RES_ID_ACL_MAX_ERPT_BANKS,
+       MLXSW_RES_ID_ACL_MAX_ERPT_BANK_SIZE,
+       MLXSW_RES_ID_ACL_MAX_LARGE_KEY_ID,
+       MLXSW_RES_ID_ACL_ERPT_ENTRIES_2KB,
+       MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB,
+       MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB,
+       MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB,
        MLXSW_RES_ID_MAX_CPU_POLICERS,
        MLXSW_RES_ID_MAX_VRS,
        MLXSW_RES_ID_MAX_RIFS,
@@ -83,6 +92,8 @@ static u16 mlxsw_res_ids[] = {
        [MLXSW_RES_ID_KVD_SIZE] = 0x1001,
        [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002,
        [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003,
+       [MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE] = 0x1005,
+       [MLXSW_RES_ID_MAX_KVD_ACTION_SETS] = 0x1007,
        [MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201,
        [MLXSW_RES_ID_CQE_V0] = 0x2210,
        [MLXSW_RES_ID_CQE_V1] = 0x2211,
@@ -104,6 +115,13 @@ static u16 mlxsw_res_ids[] = {
        [MLXSW_RES_ID_ACL_FLEX_KEYS] = 0x2910,
        [MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE] = 0x2911,
        [MLXSW_RES_ID_ACL_ACTIONS_PER_SET] = 0x2912,
+       [MLXSW_RES_ID_ACL_MAX_ERPT_BANKS] = 0x2940,
+       [MLXSW_RES_ID_ACL_MAX_ERPT_BANK_SIZE] = 0x2941,
+       [MLXSW_RES_ID_ACL_MAX_LARGE_KEY_ID] = 0x2942,
+       [MLXSW_RES_ID_ACL_ERPT_ENTRIES_2KB] = 0x2950,
+       [MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB] = 0x2951,
+       [MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB] = 0x2952,
+       [MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB] = 0x2953,
        [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13,
        [MLXSW_RES_ID_MAX_VRS] = 0x2C01,
        [MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
index 968b88af2ef5ea93077186ff72d41d95aae60803..039228525fb10bdf57a6354af02ad33c6961fe7c 100644 (file)
 #include "spectrum_span.h"
 #include "../mlxfw/mlxfw.h"
 
-#define MLXSW_FWREV_MAJOR 13
-#define MLXSW_FWREV_MINOR 1620
-#define MLXSW_FWREV_SUBMINOR 192
-#define MLXSW_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
+#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
 
-#define MLXSW_SP_FW_FILENAME \
-       "mellanox/mlxsw_spectrum-" __stringify(MLXSW_FWREV_MAJOR) \
-       "." __stringify(MLXSW_FWREV_MINOR) \
-       "." __stringify(MLXSW_FWREV_SUBMINOR) ".mfa2"
+#define MLXSW_SP1_FWREV_MAJOR 13
+#define MLXSW_SP1_FWREV_MINOR 1620
+#define MLXSW_SP1_FWREV_SUBMINOR 192
 
-static const char mlxsw_sp_driver_name[] = "mlxsw_spectrum";
+static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
+       .major = MLXSW_SP1_FWREV_MAJOR,
+       .minor = MLXSW_SP1_FWREV_MINOR,
+       .subminor = MLXSW_SP1_FWREV_SUBMINOR,
+};
+
+#define MLXSW_SP1_FW_FILENAME \
+       "mellanox/mlxsw_spectrum-" __stringify(MLXSW_SP1_FWREV_MAJOR) \
+       "." __stringify(MLXSW_SP1_FWREV_MINOR) \
+       "." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2"
+
+static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum";
+static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2";
 static const char mlxsw_sp_driver_version[] = "1.0";
 
 /* tx_hdr_version
@@ -338,29 +346,35 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
 {
        const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev;
+       const struct mlxsw_fw_rev *req_rev = mlxsw_sp->req_rev;
+       const char *fw_filename = mlxsw_sp->fw_filename;
        const struct firmware *firmware;
        int err;
 
+       /* Don't check if driver does not require it */
+       if (!req_rev || !fw_filename)
+               return 0;
+
        /* Validate driver & FW are compatible */
-       if (rev->major != MLXSW_FWREV_MAJOR) {
+       if (rev->major != req_rev->major) {
                WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n",
-                    rev->major, MLXSW_FWREV_MAJOR);
+                    rev->major, req_rev->major);
                return -EINVAL;
        }
-       if (MLXSW_FWREV_MINOR_TO_BRANCH(rev->minor) ==
-           MLXSW_FWREV_MINOR_TO_BRANCH(MLXSW_FWREV_MINOR))
+       if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) ==
+           MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor))
                return 0;
 
        dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
                 rev->major, rev->minor, rev->subminor);
        dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n",
-                MLXSW_SP_FW_FILENAME);
+                fw_filename);
 
-       err = request_firmware_direct(&firmware, MLXSW_SP_FW_FILENAME,
+       err = request_firmware_direct(&firmware, fw_filename,
                                      mlxsw_sp->bus_info->dev);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Could not request firmware file %s\n",
-                       MLXSW_SP_FW_FILENAME);
+                       fw_filename);
                return err;
        }
 
@@ -1441,6 +1455,11 @@ mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_acl_block *acl_block,
                return 0;
        case TC_CLSFLOWER_STATS:
                return mlxsw_sp_flower_stats(mlxsw_sp, acl_block, f);
+       case TC_CLSFLOWER_TMPLT_CREATE:
+               return mlxsw_sp_flower_tmplt_create(mlxsw_sp, acl_block, f);
+       case TC_CLSFLOWER_TMPLT_DESTROY:
+               mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, acl_block, f);
+               return 0;
        default:
                return -EOPNOTSUPP;
        }
@@ -1503,7 +1522,8 @@ static int mlxsw_sp_setup_tc_block_cb_flower(enum tc_setup_type type,
 
 static int
 mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
-                                   struct tcf_block *block, bool ingress)
+                                   struct tcf_block *block, bool ingress,
+                                   struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        struct mlxsw_sp_acl_block *acl_block;
@@ -1518,7 +1538,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
                        return -ENOMEM;
                block_cb = __tcf_block_cb_register(block,
                                                   mlxsw_sp_setup_tc_block_cb_flower,
-                                                  mlxsw_sp, acl_block);
+                                                  mlxsw_sp, acl_block, extack);
                if (IS_ERR(block_cb)) {
                        err = PTR_ERR(block_cb);
                        goto err_cb_register;
@@ -1541,7 +1561,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 
 err_block_bind:
        if (!tcf_block_cb_decref(block_cb)) {
-               __tcf_block_cb_unregister(block_cb);
+               __tcf_block_cb_unregister(block, block_cb);
 err_cb_register:
                mlxsw_sp_acl_block_destroy(acl_block);
        }
@@ -1571,7 +1591,7 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
        err = mlxsw_sp_acl_block_unbind(mlxsw_sp, acl_block,
                                        mlxsw_sp_port, ingress);
        if (!err && !tcf_block_cb_decref(block_cb)) {
-               __tcf_block_cb_unregister(block_cb);
+               __tcf_block_cb_unregister(block, block_cb);
                mlxsw_sp_acl_block_destroy(acl_block);
        }
 }
@@ -1596,11 +1616,12 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
        switch (f->command) {
        case TC_BLOCK_BIND:
                err = tcf_block_cb_register(f->block, cb, mlxsw_sp_port,
-                                           mlxsw_sp_port);
+                                           mlxsw_sp_port, f->extack);
                if (err)
                        return err;
                err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port,
-                                                         f->block, ingress);
+                                                         f->block, ingress,
+                                                         f->extack);
                if (err) {
                        tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
                        return err;
@@ -1712,7 +1733,8 @@ static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 
-       strlcpy(drvinfo->driver, mlxsw_sp_driver_name, sizeof(drvinfo->driver));
+       strlcpy(drvinfo->driver, mlxsw_sp->bus_info->device_kind,
+               sizeof(drvinfo->driver));
        strlcpy(drvinfo->version, mlxsw_sp_driver_version,
                sizeof(drvinfo->version));
        snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
@@ -1873,6 +1895,52 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = {
 
 #define MLXSW_SP_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_stats)
 
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_2819_stats[] = {
+       {
+               .str = "ether_pkts64octets",
+               .getter = mlxsw_reg_ppcnt_ether_stats_pkts64octets_get,
+       },
+       {
+               .str = "ether_pkts65to127octets",
+               .getter = mlxsw_reg_ppcnt_ether_stats_pkts65to127octets_get,
+       },
+       {
+               .str = "ether_pkts128to255octets",
+               .getter = mlxsw_reg_ppcnt_ether_stats_pkts128to255octets_get,
+       },
+       {
+               .str = "ether_pkts256to511octets",
+               .getter = mlxsw_reg_ppcnt_ether_stats_pkts256to511octets_get,
+       },
+       {
+               .str = "ether_pkts512to1023octets",
+               .getter = mlxsw_reg_ppcnt_ether_stats_pkts512to1023octets_get,
+       },
+       {
+               .str = "ether_pkts1024to1518octets",
+               .getter = mlxsw_reg_ppcnt_ether_stats_pkts1024to1518octets_get,
+       },
+       {
+               .str = "ether_pkts1519to2047octets",
+               .getter = mlxsw_reg_ppcnt_ether_stats_pkts1519to2047octets_get,
+       },
+       {
+               .str = "ether_pkts2048to4095octets",
+               .getter = mlxsw_reg_ppcnt_ether_stats_pkts2048to4095octets_get,
+       },
+       {
+               .str = "ether_pkts4096to8191octets",
+               .getter = mlxsw_reg_ppcnt_ether_stats_pkts4096to8191octets_get,
+       },
+       {
+               .str = "ether_pkts8192to10239octets",
+               .getter = mlxsw_reg_ppcnt_ether_stats_pkts8192to10239octets_get,
+       },
+};
+
+#define MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN \
+       ARRAY_SIZE(mlxsw_sp_port_hw_rfc_2819_stats)
+
 static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = {
        {
                .str = "rx_octets_prio",
@@ -1964,6 +2032,11 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev,
                               ETH_GSTRING_LEN);
                        p += ETH_GSTRING_LEN;
                }
+               for (i = 0; i < MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN; i++) {
+                       memcpy(p, mlxsw_sp_port_hw_rfc_2819_stats[i].str,
+                              ETH_GSTRING_LEN);
+                       p += ETH_GSTRING_LEN;
+               }
 
                for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
                        mlxsw_sp_port_get_prio_strings(&p, i);
@@ -2003,10 +2076,14 @@ mlxsw_sp_get_hw_stats_by_group(struct mlxsw_sp_port_hw_stats **p_hw_stats,
                               int *p_len, enum mlxsw_reg_ppcnt_grp grp)
 {
        switch (grp) {
-       case  MLXSW_REG_PPCNT_IEEE_8023_CNT:
+       case MLXSW_REG_PPCNT_IEEE_8023_CNT:
                *p_hw_stats = mlxsw_sp_port_hw_stats;
                *p_len = MLXSW_SP_PORT_HW_STATS_LEN;
                break;
+       case MLXSW_REG_PPCNT_RFC_2819_CNT:
+               *p_hw_stats = mlxsw_sp_port_hw_rfc_2819_stats;
+               *p_len = MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN;
+               break;
        case MLXSW_REG_PPCNT_PRIO_CNT:
                *p_hw_stats = mlxsw_sp_port_hw_prio_stats;
                *p_len = MLXSW_SP_PORT_HW_PRIO_STATS_LEN;
@@ -2056,6 +2133,11 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev,
                                  data, data_index);
        data_index = MLXSW_SP_PORT_HW_STATS_LEN;
 
+       /* RFC 2819 Counters */
+       __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_2819_CNT, 0,
+                                 data, data_index);
+       data_index += MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN;
+
        /* Per-Priority Counters */
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_PRIO_CNT, i,
@@ -3371,6 +3453,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
        MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
        MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
        MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
        /* PKT Sample trap */
        MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
                  false, SP_IP2ME, DISCARD),
@@ -3757,6 +3841,36 @@ err_fids_init:
        return err;
 }
 
+static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
+                         const struct mlxsw_bus_info *mlxsw_bus_info)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+       mlxsw_sp->req_rev = &mlxsw_sp1_fw_rev;
+       mlxsw_sp->fw_filename = MLXSW_SP1_FW_FILENAME;
+       mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops;
+       mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops;
+       mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
+       mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops;
+       mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
+
+       return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
+}
+
+static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
+                         const struct mlxsw_bus_info *mlxsw_bus_info)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+       mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
+       mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
+       mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
+       mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
+       mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
+
+       return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
+}
+
 static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
@@ -3777,7 +3891,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
        mlxsw_sp_kvdl_fini(mlxsw_sp);
 }
 
-static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
+static const struct mlxsw_config_profile mlxsw_sp1_config_profile = {
        .used_max_mid                   = 1,
        .max_mid                        = MLXSW_SP_MID_MAX,
        .used_flood_tables              = 1,
@@ -3803,6 +3917,28 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
        },
 };
 
+static const struct mlxsw_config_profile mlxsw_sp2_config_profile = {
+       .used_max_mid                   = 1,
+       .max_mid                        = MLXSW_SP_MID_MAX,
+       .used_flood_tables              = 1,
+       .used_flood_mode                = 1,
+       .flood_mode                     = 3,
+       .max_fid_offset_flood_tables    = 3,
+       .fid_offset_flood_table_size    = VLAN_N_VID - 1,
+       .max_fid_flood_tables           = 3,
+       .fid_flood_table_size           = MLXSW_SP_FID_8021D_MAX,
+       .used_max_ib_mc                 = 1,
+       .max_ib_mc                      = 0,
+       .used_max_pkey                  = 1,
+       .max_pkey                       = 0,
+       .swid_config                    = {
+               {
+                       .used_type      = 1,
+                       .type           = MLXSW_PORT_SWID_TYPE_ETH,
+               }
+       },
+};
+
 static void
 mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core,
                                      struct devlink_resource_size_params *kvd_size_params,
@@ -3839,7 +3975,7 @@ mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core,
                                          DEVLINK_RESOURCE_UNIT_ENTRY);
 }
 
-static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
+static int mlxsw_sp1_resources_kvd_register(struct mlxsw_core *mlxsw_core)
 {
        struct devlink *devlink = priv_to_devlink(mlxsw_core);
        struct devlink_resource_size_params hash_single_size_params;
@@ -3850,7 +3986,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
        const struct mlxsw_config_profile *profile;
        int err;
 
-       profile = &mlxsw_sp_config_profile;
+       profile = &mlxsw_sp1_config_profile;
        if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SIZE))
                return -EIO;
 
@@ -3876,7 +4012,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
        if (err)
                return err;
 
-       err = mlxsw_sp_kvdl_resources_register(mlxsw_core);
+       err = mlxsw_sp1_kvdl_resources_register(mlxsw_core);
        if  (err)
                return err;
 
@@ -3905,6 +4041,16 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
        return 0;
 }
 
+static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
+{
+       return mlxsw_sp1_resources_kvd_register(mlxsw_core);
+}
+
+static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core)
+{
+       return 0;
+}
+
 static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
                                  const struct mlxsw_config_profile *profile,
                                  u64 *p_single_size, u64 *p_double_size,
@@ -3960,10 +4106,10 @@ static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
        return 0;
 }
 
-static struct mlxsw_driver mlxsw_sp_driver = {
-       .kind                           = mlxsw_sp_driver_name,
+static struct mlxsw_driver mlxsw_sp1_driver = {
+       .kind                           = mlxsw_sp1_driver_name,
        .priv_size                      = sizeof(struct mlxsw_sp),
-       .init                           = mlxsw_sp_init,
+       .init                           = mlxsw_sp1_init,
        .fini                           = mlxsw_sp_fini,
        .basic_trap_groups_set          = mlxsw_sp_basic_trap_groups_set,
        .port_split                     = mlxsw_sp_port_split,
@@ -3979,10 +4125,35 @@ static struct mlxsw_driver mlxsw_sp_driver = {
        .sb_occ_port_pool_get           = mlxsw_sp_sb_occ_port_pool_get,
        .sb_occ_tc_port_bind_get        = mlxsw_sp_sb_occ_tc_port_bind_get,
        .txhdr_construct                = mlxsw_sp_txhdr_construct,
-       .resources_register             = mlxsw_sp_resources_register,
+       .resources_register             = mlxsw_sp1_resources_register,
        .kvd_sizes_get                  = mlxsw_sp_kvd_sizes_get,
        .txhdr_len                      = MLXSW_TXHDR_LEN,
-       .profile                        = &mlxsw_sp_config_profile,
+       .profile                        = &mlxsw_sp1_config_profile,
+       .res_query_enabled              = true,
+};
+
+static struct mlxsw_driver mlxsw_sp2_driver = {
+       .kind                           = mlxsw_sp2_driver_name,
+       .priv_size                      = sizeof(struct mlxsw_sp),
+       .init                           = mlxsw_sp2_init,
+       .fini                           = mlxsw_sp_fini,
+       .basic_trap_groups_set          = mlxsw_sp_basic_trap_groups_set,
+       .port_split                     = mlxsw_sp_port_split,
+       .port_unsplit                   = mlxsw_sp_port_unsplit,
+       .sb_pool_get                    = mlxsw_sp_sb_pool_get,
+       .sb_pool_set                    = mlxsw_sp_sb_pool_set,
+       .sb_port_pool_get               = mlxsw_sp_sb_port_pool_get,
+       .sb_port_pool_set               = mlxsw_sp_sb_port_pool_set,
+       .sb_tc_pool_bind_get            = mlxsw_sp_sb_tc_pool_bind_get,
+       .sb_tc_pool_bind_set            = mlxsw_sp_sb_tc_pool_bind_set,
+       .sb_occ_snapshot                = mlxsw_sp_sb_occ_snapshot,
+       .sb_occ_max_clear               = mlxsw_sp_sb_occ_max_clear,
+       .sb_occ_port_pool_get           = mlxsw_sp_sb_occ_port_pool_get,
+       .sb_occ_tc_port_bind_get        = mlxsw_sp_sb_occ_tc_port_bind_get,
+       .txhdr_construct                = mlxsw_sp_txhdr_construct,
+       .resources_register             = mlxsw_sp2_resources_register,
+       .txhdr_len                      = MLXSW_TXHDR_LEN,
+       .profile                        = &mlxsw_sp2_config_profile,
        .res_query_enabled              = true,
 };
 
@@ -4397,7 +4568,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
                if (!is_vlan_dev(upper_dev) &&
                    !netif_is_lag_master(upper_dev) &&
                    !netif_is_bridge_master(upper_dev) &&
-                   !netif_is_ovs_master(upper_dev)) {
+                   !netif_is_ovs_master(upper_dev) &&
+                   !netif_is_macvlan(upper_dev)) {
                        NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
                        return -EINVAL;
                }
@@ -4423,6 +4595,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
                        NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port");
                        return -EINVAL;
                }
+               if (netif_is_macvlan(upper_dev) &&
+                   !mlxsw_sp_rif_find_by_dev(mlxsw_sp, lower_dev)) {
+                       NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+                       return -EOPNOTSUPP;
+               }
                if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
                        NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN");
                        return -EINVAL;
@@ -4461,6 +4638,9 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
                                err = mlxsw_sp_port_ovs_join(mlxsw_sp_port);
                        else
                                mlxsw_sp_port_ovs_leave(mlxsw_sp_port);
+               } else if (netif_is_macvlan(upper_dev)) {
+                       if (!info->linking)
+                               mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
                }
                break;
        }
@@ -4545,8 +4725,9 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
        switch (event) {
        case NETDEV_PRECHANGEUPPER:
                upper_dev = info->upper_dev;
-               if (!netif_is_bridge_master(upper_dev)) {
-                       NL_SET_ERR_MSG_MOD(extack, "VLAN devices only support bridge and VRF uppers");
+               if (!netif_is_bridge_master(upper_dev) &&
+                   !netif_is_macvlan(upper_dev)) {
+                       NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
                        return -EINVAL;
                }
                if (!info->linking)
@@ -4558,6 +4739,11 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
                        NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
                        return -EINVAL;
                }
+               if (netif_is_macvlan(upper_dev) &&
+                   !mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan_dev)) {
+                       NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+                       return -EOPNOTSUPP;
+               }
                break;
        case NETDEV_CHANGEUPPER:
                upper_dev = info->upper_dev;
@@ -4571,6 +4757,9 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
                                mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
                                                           vlan_dev,
                                                           upper_dev);
+               } else if (netif_is_macvlan(upper_dev)) {
+                       if (!info->linking)
+                               mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
                } else {
                        err = -EINVAL;
                        WARN_ON(1);
@@ -4620,6 +4809,64 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
        return 0;
 }
 
+static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
+                                          unsigned long event, void *ptr)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+       struct netdev_notifier_changeupper_info *info = ptr;
+       struct netlink_ext_ack *extack;
+       struct net_device *upper_dev;
+
+       if (!mlxsw_sp)
+               return 0;
+
+       extack = netdev_notifier_info_to_extack(&info->info);
+
+       switch (event) {
+       case NETDEV_PRECHANGEUPPER:
+               upper_dev = info->upper_dev;
+               if (!is_vlan_dev(upper_dev) && !netif_is_macvlan(upper_dev)) {
+                       NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+                       return -EOPNOTSUPP;
+               }
+               if (!info->linking)
+                       break;
+               if (netif_is_macvlan(upper_dev) &&
+                   !mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev)) {
+                       NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+                       return -EOPNOTSUPP;
+               }
+               break;
+       case NETDEV_CHANGEUPPER:
+               upper_dev = info->upper_dev;
+               if (info->linking)
+                       break;
+               if (netif_is_macvlan(upper_dev))
+                       mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
+               break;
+       }
+
+       return 0;
+}
+
+static int mlxsw_sp_netdevice_macvlan_event(struct net_device *macvlan_dev,
+                                           unsigned long event, void *ptr)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
+       struct netdev_notifier_changeupper_info *info = ptr;
+       struct netlink_ext_ack *extack;
+
+       if (!mlxsw_sp || event != NETDEV_PRECHANGEUPPER)
+               return 0;
+
+       extack = netdev_notifier_info_to_extack(&info->info);
+
+       /* VRF enslavement is handled in mlxsw_sp_netdevice_vrf_event() */
+       NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+
+       return -EOPNOTSUPP;
+}
+
 static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
 {
        struct netdev_notifier_changeupper_info *info = ptr;
@@ -4661,6 +4908,10 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
                err = mlxsw_sp_netdevice_lag_event(dev, event, ptr);
        else if (is_vlan_dev(dev))
                err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr);
+       else if (netif_is_bridge_master(dev))
+               err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr);
+       else if (netif_is_macvlan(dev))
+               err = mlxsw_sp_netdevice_macvlan_event(dev, event, ptr);
 
        return notifier_from_errno(err);
 }
@@ -4681,14 +4932,24 @@ static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
        .notifier_call = mlxsw_sp_inet6addr_event,
 };
 
-static const struct pci_device_id mlxsw_sp_pci_id_table[] = {
+static const struct pci_device_id mlxsw_sp1_pci_id_table[] = {
        {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0},
        {0, },
 };
 
-static struct pci_driver mlxsw_sp_pci_driver = {
-       .name = mlxsw_sp_driver_name,
-       .id_table = mlxsw_sp_pci_id_table,
+static struct pci_driver mlxsw_sp1_pci_driver = {
+       .name = mlxsw_sp1_driver_name,
+       .id_table = mlxsw_sp1_pci_id_table,
+};
+
+static const struct pci_device_id mlxsw_sp2_pci_id_table[] = {
+       {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM2), 0},
+       {0, },
+};
+
+static struct pci_driver mlxsw_sp2_pci_driver = {
+       .name = mlxsw_sp2_driver_name,
+       .id_table = mlxsw_sp2_pci_id_table,
 };
 
 static int __init mlxsw_sp_module_init(void)
@@ -4700,19 +4961,31 @@ static int __init mlxsw_sp_module_init(void)
        register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
        register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
 
-       err = mlxsw_core_driver_register(&mlxsw_sp_driver);
+       err = mlxsw_core_driver_register(&mlxsw_sp1_driver);
+       if (err)
+               goto err_sp1_core_driver_register;
+
+       err = mlxsw_core_driver_register(&mlxsw_sp2_driver);
+       if (err)
+               goto err_sp2_core_driver_register;
+
+       err = mlxsw_pci_driver_register(&mlxsw_sp1_pci_driver);
        if (err)
-               goto err_core_driver_register;
+               goto err_sp1_pci_driver_register;
 
-       err = mlxsw_pci_driver_register(&mlxsw_sp_pci_driver);
+       err = mlxsw_pci_driver_register(&mlxsw_sp2_pci_driver);
        if (err)
-               goto err_pci_driver_register;
+               goto err_sp2_pci_driver_register;
 
        return 0;
 
-err_pci_driver_register:
-       mlxsw_core_driver_unregister(&mlxsw_sp_driver);
-err_core_driver_register:
+err_sp2_pci_driver_register:
+       mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver);
+err_sp1_pci_driver_register:
+       mlxsw_core_driver_unregister(&mlxsw_sp2_driver);
+err_sp2_core_driver_register:
+       mlxsw_core_driver_unregister(&mlxsw_sp1_driver);
+err_sp1_core_driver_register:
        unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
        unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
        unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
@@ -4722,8 +4995,10 @@ err_core_driver_register:
 
 static void __exit mlxsw_sp_module_exit(void)
 {
-       mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver);
-       mlxsw_core_driver_unregister(&mlxsw_sp_driver);
+       mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver);
+       mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver);
+       mlxsw_core_driver_unregister(&mlxsw_sp2_driver);
+       mlxsw_core_driver_unregister(&mlxsw_sp1_driver);
        unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
        unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
        unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
@@ -4736,5 +5011,6 @@ module_exit(mlxsw_sp_module_exit);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox Spectrum driver");
-MODULE_DEVICE_TABLE(pci, mlxsw_sp_pci_id_table);
-MODULE_FIRMWARE(MLXSW_SP_FW_FILENAME);
+MODULE_DEVICE_TABLE(pci, mlxsw_sp1_pci_id_table);
+MODULE_DEVICE_TABLE(pci, mlxsw_sp2_pci_id_table);
+MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME);
index 4a519d8edec8fe0410e5548182a389848011b2d2..13eca1a79d52544a75ef0b69854d814000becd83 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum.h
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
@@ -54,6 +54,7 @@
 #include "core.h"
 #include "core_acl_flex_keys.h"
 #include "core_acl_flex_actions.h"
+#include "reg.h"
 
 #define MLXSW_SP_FID_8021D_MAX 1024
 
@@ -145,6 +146,9 @@ struct mlxsw_sp_acl;
 struct mlxsw_sp_counter_pool;
 struct mlxsw_sp_fid_core;
 struct mlxsw_sp_kvdl;
+struct mlxsw_sp_kvdl_ops;
+struct mlxsw_sp_mr_tcam_ops;
+struct mlxsw_sp_acl_tcam_ops;
 
 struct mlxsw_sp {
        struct mlxsw_sp_port **ports;
@@ -168,6 +172,13 @@ struct mlxsw_sp {
                struct mlxsw_sp_span_entry *entries;
                int entries_count;
        } span;
+       const struct mlxsw_fw_rev *req_rev;
+       const char *fw_filename;
+       const struct mlxsw_sp_kvdl_ops *kvdl_ops;
+       const struct mlxsw_afa_ops *afa_ops;
+       const struct mlxsw_afk_ops *afk_ops;
+       const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
+       const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -233,6 +244,7 @@ struct mlxsw_sp_port {
                struct ieee_ets *ets;
                struct ieee_maxrate *maxrate;
                struct ieee_pfc *pfc;
+               enum mlxsw_reg_qpts_trust_state trust_state;
        } dcb;
        struct {
                u8 module;
@@ -407,6 +419,8 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
+void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
+                             const struct net_device *macvlan_dev);
 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
                            unsigned long event, void *ptr);
 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
@@ -435,15 +449,62 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
 
 /* spectrum_kvdl.c */
+enum mlxsw_sp_kvdl_entry_type {
+       MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+       MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+       MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+       MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+};
+
+static inline unsigned int
+mlxsw_sp_kvdl_entry_size(enum mlxsw_sp_kvdl_entry_type type)
+{
+       switch (type) {
+       case MLXSW_SP_KVDL_ENTRY_TYPE_ADJ: /* fall through */
+       case MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET: /* fall through */
+       case MLXSW_SP_KVDL_ENTRY_TYPE_PBS: /* fall through */
+       case MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR: /* fall through */
+       default:
+               return 1;
+       }
+}
+
+struct mlxsw_sp_kvdl_ops {
+       size_t priv_size;
+       int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+       void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
+       int (*alloc)(struct mlxsw_sp *mlxsw_sp, void *priv,
+                    enum mlxsw_sp_kvdl_entry_type type,
+                    unsigned int entry_count, u32 *p_entry_index);
+       void (*free)(struct mlxsw_sp *mlxsw_sp, void *priv,
+                    enum mlxsw_sp_kvdl_entry_type type,
+                    unsigned int entry_count, int entry_index);
+       int (*alloc_size_query)(struct mlxsw_sp *mlxsw_sp, void *priv,
+                               enum mlxsw_sp_kvdl_entry_type type,
+                               unsigned int entry_count,
+                               unsigned int *p_alloc_count);
+       int (*resources_register)(struct mlxsw_sp *mlxsw_sp, void *priv);
+};
+
 int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp);
-int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
-                       u32 *p_entry_index);
-void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
-int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
-                                  unsigned int entry_count,
-                                  unsigned int *p_alloc_size);
-int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp,
+                       enum mlxsw_sp_kvdl_entry_type type,
+                       unsigned int entry_count, u32 *p_entry_index);
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp,
+                       enum mlxsw_sp_kvdl_entry_type type,
+                       unsigned int entry_count, int entry_index);
+int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp,
+                                   enum mlxsw_sp_kvdl_entry_type type,
+                                   unsigned int entry_count,
+                                   unsigned int *p_alloc_count);
+
+/* spectrum1_kvdl.c */
+extern const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops;
+int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
+
+/* spectrum2_kvdl.c */
+extern const struct mlxsw_sp_kvdl_ops mlxsw_sp2_kvdl_ops;
 
 struct mlxsw_sp_acl_rule_info {
        unsigned int priority;
@@ -452,44 +513,14 @@ struct mlxsw_sp_acl_rule_info {
        unsigned int counter_index;
 };
 
-enum mlxsw_sp_acl_profile {
-       MLXSW_SP_ACL_PROFILE_FLOWER,
-};
-
-struct mlxsw_sp_acl_profile_ops {
-       size_t ruleset_priv_size;
-       int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp,
-                          void *priv, void *ruleset_priv);
-       void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv);
-       int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
-                           struct mlxsw_sp_port *mlxsw_sp_port,
-                           bool ingress);
-       void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
-                              struct mlxsw_sp_port *mlxsw_sp_port,
-                              bool ingress);
-       u16 (*ruleset_group_id)(void *ruleset_priv);
-       size_t rule_priv_size;
-       int (*rule_add)(struct mlxsw_sp *mlxsw_sp,
-                       void *ruleset_priv, void *rule_priv,
-                       struct mlxsw_sp_acl_rule_info *rulei);
-       void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv);
-       int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv,
-                                bool *activity);
-};
-
-struct mlxsw_sp_acl_ops {
-       size_t priv_size;
-       int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
-       void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
-       const struct mlxsw_sp_acl_profile_ops *
-                       (*profile_ops)(struct mlxsw_sp *mlxsw_sp,
-                                      enum mlxsw_sp_acl_profile profile);
-};
-
 struct mlxsw_sp_acl_block;
 struct mlxsw_sp_acl_ruleset;
 
 /* spectrum_acl.c */
+enum mlxsw_sp_acl_profile {
+       MLXSW_SP_ACL_PROFILE_FLOWER,
+};
+
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
 struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block);
 unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block);
@@ -514,7 +545,8 @@ mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
 struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
                         struct mlxsw_sp_acl_block *block, u32 chain_index,
-                        enum mlxsw_sp_acl_profile profile);
+                        enum mlxsw_sp_acl_profile profile,
+                        struct mlxsw_afk_element_usage *tmplt_elusage);
 void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp,
                              struct mlxsw_sp_acl_ruleset *ruleset);
 u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset);
@@ -541,25 +573,30 @@ int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
                                  struct mlxsw_sp_acl_rule_info *rulei,
                                  struct mlxsw_sp_acl_block *block,
-                                 struct net_device *out_dev);
+                                 struct net_device *out_dev,
+                                 struct netlink_ext_ack *extack);
 int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
                               struct mlxsw_sp_acl_rule_info *rulei,
-                              struct net_device *out_dev);
+                              struct net_device *out_dev,
+                              struct netlink_ext_ack *extack);
 int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp,
                                struct mlxsw_sp_acl_rule_info *rulei,
-                               u32 action, u16 vid, u16 proto, u8 prio);
+                               u32 action, u16 vid, u16 proto, u8 prio,
+                               struct netlink_ext_ack *extack);
 int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp,
-                                struct mlxsw_sp_acl_rule_info *rulei);
+                                struct mlxsw_sp_acl_rule_info *rulei,
+                                struct netlink_ext_ack *extack);
 int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp,
                                   struct mlxsw_sp_acl_rule_info *rulei,
-                                  u16 fid);
+                                  u16 fid, struct netlink_ext_ack *extack);
 
 struct mlxsw_sp_acl_rule;
 
 struct mlxsw_sp_acl_rule *
 mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
                         struct mlxsw_sp_acl_ruleset *ruleset,
-                        unsigned long cookie);
+                        unsigned long cookie,
+                        struct netlink_ext_ack *extack);
 void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp,
                               struct mlxsw_sp_acl_rule *rule);
 int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
@@ -582,7 +619,52 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp);
 
 /* spectrum_acl_tcam.c */
-extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops;
+struct mlxsw_sp_acl_tcam;
+struct mlxsw_sp_acl_tcam_region;
+
+struct mlxsw_sp_acl_tcam_ops {
+       enum mlxsw_reg_ptar_key_type key_type;
+       size_t priv_size;
+       int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv,
+                   struct mlxsw_sp_acl_tcam *tcam);
+       void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
+       size_t region_priv_size;
+       int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv,
+                          void *tcam_priv,
+                          struct mlxsw_sp_acl_tcam_region *region);
+       void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv);
+       int (*region_associate)(struct mlxsw_sp *mlxsw_sp,
+                               struct mlxsw_sp_acl_tcam_region *region);
+       size_t chunk_priv_size;
+       void (*chunk_init)(void *region_priv, void *chunk_priv,
+                          unsigned int priority);
+       void (*chunk_fini)(void *chunk_priv);
+       size_t entry_priv_size;
+       int (*entry_add)(struct mlxsw_sp *mlxsw_sp,
+                        void *region_priv, void *chunk_priv,
+                        void *entry_priv,
+                        struct mlxsw_sp_acl_rule_info *rulei);
+       void (*entry_del)(struct mlxsw_sp *mlxsw_sp,
+                         void *region_priv, void *chunk_priv,
+                         void *entry_priv);
+       int (*entry_activity_get)(struct mlxsw_sp *mlxsw_sp,
+                                 void *region_priv, void *entry_priv,
+                                 bool *activity);
+};
+
+/* spectrum1_acl_tcam.c */
+extern const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops;
+
+/* spectrum2_acl_tcam.c */
+extern const struct mlxsw_sp_acl_tcam_ops mlxsw_sp2_acl_tcam_ops;
+
+/* spectrum_acl_flex_actions.c */
+extern const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops;
+extern const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops;
+
+/* spectrum_acl_flex_keys.c */
+extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
+extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
@@ -594,6 +676,12 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
 int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
                          struct mlxsw_sp_acl_block *block,
                          struct tc_cls_flower_offload *f);
+int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_block *block,
+                                struct tc_cls_flower_offload *f);
+void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_acl_block *block,
+                                  struct tc_cls_flower_offload *f);
 
 /* spectrum_qdisc.c */
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port);
@@ -631,4 +719,40 @@ void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp);
 
+/* spectrum_mr.c */
+enum mlxsw_sp_mr_route_prio {
+       MLXSW_SP_MR_ROUTE_PRIO_SG,
+       MLXSW_SP_MR_ROUTE_PRIO_STARG,
+       MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
+       __MLXSW_SP_MR_ROUTE_PRIO_MAX
+};
+
+#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
+
+struct mlxsw_sp_mr_route_key;
+
+struct mlxsw_sp_mr_tcam_ops {
+       size_t priv_size;
+       int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+       void (*fini)(void *priv);
+       size_t route_priv_size;
+       int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv,
+                           void *route_priv,
+                           struct mlxsw_sp_mr_route_key *key,
+                           struct mlxsw_afa_block *afa_block,
+                           enum mlxsw_sp_mr_route_prio prio);
+       void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
+                             void *route_priv,
+                             struct mlxsw_sp_mr_route_key *key);
+       int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+                           struct mlxsw_sp_mr_route_key *key,
+                           struct mlxsw_afa_block *afa_block);
+};
+
+/* spectrum1_mr_tcam.c */
+extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops;
+
+/* spectrum2_mr_tcam.c */
+extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops;
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
new file mode 100644 (file)
index 0000000..5c89565
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "reg.h"
+#include "core.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+struct mlxsw_sp1_acl_tcam_region {
+       struct mlxsw_sp_acl_ctcam_region cregion;
+       struct mlxsw_sp_acl_tcam_region *region;
+       struct {
+               struct mlxsw_sp_acl_ctcam_chunk cchunk;
+               struct mlxsw_sp_acl_ctcam_entry centry;
+               struct mlxsw_sp_acl_rule_info *rulei;
+       } catchall;
+};
+
+struct mlxsw_sp1_acl_tcam_chunk {
+       struct mlxsw_sp_acl_ctcam_chunk cchunk;
+};
+
+struct mlxsw_sp1_acl_tcam_entry {
+       struct mlxsw_sp_acl_ctcam_entry centry;
+};
+
+static int
+mlxsw_sp1_acl_ctcam_region_entry_insert(struct mlxsw_sp_acl_ctcam_region *cregion,
+                                       struct mlxsw_sp_acl_ctcam_entry *centry,
+                                       const char *mask)
+{
+       return 0;
+}
+
+static void
+mlxsw_sp1_acl_ctcam_region_entry_remove(struct mlxsw_sp_acl_ctcam_region *cregion,
+                                       struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+}
+
+static const struct mlxsw_sp_acl_ctcam_region_ops
+mlxsw_sp1_acl_ctcam_region_ops = {
+       .entry_insert = mlxsw_sp1_acl_ctcam_region_entry_insert,
+       .entry_remove = mlxsw_sp1_acl_ctcam_region_entry_remove,
+};
+
+static int mlxsw_sp1_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv,
+                                  struct mlxsw_sp_acl_tcam *tcam)
+{
+       return 0;
+}
+
+static void mlxsw_sp1_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+}
+
+static int
+mlxsw_sp1_acl_ctcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
+                                       struct mlxsw_sp1_acl_tcam_region *region)
+{
+       struct mlxsw_sp_acl_rule_info *rulei;
+       int err;
+
+       mlxsw_sp_acl_ctcam_chunk_init(&region->cregion,
+                                     &region->catchall.cchunk,
+                                     MLXSW_SP_ACL_TCAM_CATCHALL_PRIO);
+       rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl);
+       if (IS_ERR(rulei)) {
+               err = PTR_ERR(rulei);
+               goto err_rulei_create;
+       }
+       err = mlxsw_sp_acl_rulei_act_continue(rulei);
+       if (WARN_ON(err))
+               goto err_rulei_act_continue;
+       err = mlxsw_sp_acl_rulei_commit(rulei);
+       if (err)
+               goto err_rulei_commit;
+       err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &region->cregion,
+                                          &region->catchall.cchunk,
+                                          &region->catchall.centry,
+                                          rulei, false);
+       if (err)
+               goto err_entry_add;
+       region->catchall.rulei = rulei;
+       return 0;
+
+err_entry_add:
+err_rulei_commit:
+err_rulei_act_continue:
+       mlxsw_sp_acl_rulei_destroy(rulei);
+err_rulei_create:
+       mlxsw_sp_acl_ctcam_chunk_fini(&region->catchall.cchunk);
+       return err;
+}
+
+static void
+mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp,
+                                       struct mlxsw_sp1_acl_tcam_region *region)
+{
+       struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei;
+
+       mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &region->cregion,
+                                    &region->catchall.cchunk,
+                                    &region->catchall.centry);
+       mlxsw_sp_acl_rulei_destroy(rulei);
+       mlxsw_sp_acl_ctcam_chunk_fini(&region->catchall.cchunk);
+}
+
+static int
+mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
+                              void *tcam_priv,
+                              struct mlxsw_sp_acl_tcam_region *_region)
+{
+       struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+       int err;
+
+       err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &region->cregion,
+                                            _region,
+                                            &mlxsw_sp1_acl_ctcam_region_ops);
+       if (err)
+               return err;
+       err = mlxsw_sp1_acl_ctcam_region_catchall_add(mlxsw_sp, region);
+       if (err)
+               goto err_catchall_add;
+       region->region = _region;
+       return 0;
+
+err_catchall_add:
+       mlxsw_sp_acl_ctcam_region_fini(&region->cregion);
+       return err;
+}
+
+static void
+mlxsw_sp1_acl_tcam_region_fini(struct mlxsw_sp *mlxsw_sp, void *region_priv)
+{
+       struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+
+       mlxsw_sp1_acl_ctcam_region_catchall_del(mlxsw_sp, region);
+       mlxsw_sp_acl_ctcam_region_fini(&region->cregion);
+}
+
+static int
+mlxsw_sp1_acl_tcam_region_associate(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_acl_tcam_region *region)
+{
+       return 0;
+}
+
+static void mlxsw_sp1_acl_tcam_chunk_init(void *region_priv, void *chunk_priv,
+                                         unsigned int priority)
+{
+       struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+       struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+
+       mlxsw_sp_acl_ctcam_chunk_init(&region->cregion, &chunk->cchunk,
+                                     priority);
+}
+
+static void mlxsw_sp1_acl_tcam_chunk_fini(void *chunk_priv)
+{
+       struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+
+       mlxsw_sp_acl_ctcam_chunk_fini(&chunk->cchunk);
+}
+
+static int mlxsw_sp1_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+                                       void *region_priv, void *chunk_priv,
+                                       void *entry_priv,
+                                       struct mlxsw_sp_acl_rule_info *rulei)
+{
+       struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+       struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+       struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+
+       return mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &region->cregion,
+                                           &chunk->cchunk, &entry->centry,
+                                           rulei, false);
+}
+
+static void mlxsw_sp1_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+                                        void *region_priv, void *chunk_priv,
+                                        void *entry_priv)
+{
+       struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+       struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+       struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+
+       mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &region->cregion,
+                                    &chunk->cchunk, &entry->centry);
+}
+
+static int
+mlxsw_sp1_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+                                            struct mlxsw_sp_acl_tcam_region *_region,
+                                            unsigned int offset,
+                                            bool *activity)
+{
+       char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+       int err;
+
+       mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ,
+                            _region->tcam_region_info, offset, 0);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+       if (err)
+               return err;
+       *activity = mlxsw_reg_ptce2_a_get(ptce2_pl);
+       return 0;
+}
+
+static int
+mlxsw_sp1_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+                                     void *region_priv, void *entry_priv,
+                                     bool *activity)
+{
+       struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+       struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+       unsigned int offset;
+
+       offset = mlxsw_sp_acl_ctcam_entry_offset(&entry->centry);
+       return mlxsw_sp1_acl_tcam_region_entry_activity_get(mlxsw_sp,
+                                                           region->region,
+                                                           offset, activity);
+}
+
+const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops = {
+       .key_type               = MLXSW_REG_PTAR_KEY_TYPE_FLEX,
+       .priv_size              = 0,
+       .init                   = mlxsw_sp1_acl_tcam_init,
+       .fini                   = mlxsw_sp1_acl_tcam_fini,
+       .region_priv_size       = sizeof(struct mlxsw_sp1_acl_tcam_region),
+       .region_init            = mlxsw_sp1_acl_tcam_region_init,
+       .region_fini            = mlxsw_sp1_acl_tcam_region_fini,
+       .region_associate       = mlxsw_sp1_acl_tcam_region_associate,
+       .chunk_priv_size        = sizeof(struct mlxsw_sp1_acl_tcam_chunk),
+       .chunk_init             = mlxsw_sp1_acl_tcam_chunk_init,
+       .chunk_fini             = mlxsw_sp1_acl_tcam_chunk_fini,
+       .entry_priv_size        = sizeof(struct mlxsw_sp1_acl_tcam_entry),
+       .entry_add              = mlxsw_sp1_acl_tcam_entry_add,
+       .entry_del              = mlxsw_sp1_acl_tcam_entry_del,
+       .entry_activity_get     = mlxsw_sp1_acl_tcam_entry_activity_get,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
new file mode 100644 (file)
index 0000000..0d45838
--- /dev/null
@@ -0,0 +1,459 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "spectrum.h"
+
+#define MLXSW_SP1_KVDL_SINGLE_BASE 0
+#define MLXSW_SP1_KVDL_SINGLE_SIZE 16384
+#define MLXSW_SP1_KVDL_SINGLE_END \
+       (MLXSW_SP1_KVDL_SINGLE_SIZE + MLXSW_SP1_KVDL_SINGLE_BASE - 1)
+
+#define MLXSW_SP1_KVDL_CHUNKS_BASE \
+       (MLXSW_SP1_KVDL_SINGLE_BASE + MLXSW_SP1_KVDL_SINGLE_SIZE)
+#define MLXSW_SP1_KVDL_CHUNKS_SIZE 49152
+#define MLXSW_SP1_KVDL_CHUNKS_END \
+       (MLXSW_SP1_KVDL_CHUNKS_SIZE + MLXSW_SP1_KVDL_CHUNKS_BASE - 1)
+
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE \
+       (MLXSW_SP1_KVDL_CHUNKS_BASE + MLXSW_SP1_KVDL_CHUNKS_SIZE)
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE \
+       (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE)
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_END \
+       (MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE - 1)
+
+#define MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE 1
+#define MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE 32
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512
+
+struct mlxsw_sp1_kvdl_part_info {
+       unsigned int part_index;
+       unsigned int start_index;
+       unsigned int end_index;
+       unsigned int alloc_size;
+       enum mlxsw_sp_resource_id resource_id;
+};
+
+enum mlxsw_sp1_kvdl_part_id {
+       MLXSW_SP1_KVDL_PART_ID_SINGLE,
+       MLXSW_SP1_KVDL_PART_ID_CHUNKS,
+       MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS,
+};
+
+#define MLXSW_SP1_KVDL_PART_INFO(id)                           \
+[MLXSW_SP1_KVDL_PART_ID_##id] = {                              \
+       .start_index = MLXSW_SP1_KVDL_##id##_BASE,              \
+       .end_index = MLXSW_SP1_KVDL_##id##_END,                 \
+       .alloc_size = MLXSW_SP1_KVDL_##id##_ALLOC_SIZE,         \
+       .resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id,       \
+}
+
+static const struct mlxsw_sp1_kvdl_part_info mlxsw_sp1_kvdl_parts_info[] = {
+       MLXSW_SP1_KVDL_PART_INFO(SINGLE),
+       MLXSW_SP1_KVDL_PART_INFO(CHUNKS),
+       MLXSW_SP1_KVDL_PART_INFO(LARGE_CHUNKS),
+};
+
+#define MLXSW_SP1_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp1_kvdl_parts_info)
+
+struct mlxsw_sp1_kvdl_part {
+       struct mlxsw_sp1_kvdl_part_info info;
+       unsigned long usage[0]; /* Entries */
+};
+
+struct mlxsw_sp1_kvdl {
+       struct mlxsw_sp1_kvdl_part *parts[MLXSW_SP1_KVDL_PARTS_INFO_LEN];
+};
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_alloc_size_part(struct mlxsw_sp1_kvdl *kvdl,
+                              unsigned int alloc_size)
+{
+       struct mlxsw_sp1_kvdl_part *part, *min_part = NULL;
+       int i;
+
+       for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+               part = kvdl->parts[i];
+               if (alloc_size <= part->info.alloc_size &&
+                   (!min_part ||
+                    part->info.alloc_size <= min_part->info.alloc_size))
+                       min_part = part;
+       }
+
+       return min_part ?: ERR_PTR(-ENOBUFS);
+}
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_index_part(struct mlxsw_sp1_kvdl *kvdl, u32 kvdl_index)
+{
+       struct mlxsw_sp1_kvdl_part *part;
+       int i;
+
+       for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+               part = kvdl->parts[i];
+               if (kvdl_index >= part->info.start_index &&
+                   kvdl_index <= part->info.end_index)
+                       return part;
+       }
+
+       return ERR_PTR(-EINVAL);
+}
+
+static u32
+mlxsw_sp1_kvdl_to_kvdl_index(const struct mlxsw_sp1_kvdl_part_info *info,
+                            unsigned int entry_index)
+{
+       return info->start_index + entry_index * info->alloc_size;
+}
+
+static unsigned int
+mlxsw_sp1_kvdl_to_entry_index(const struct mlxsw_sp1_kvdl_part_info *info,
+                             u32 kvdl_index)
+{
+       return (kvdl_index - info->start_index) / info->alloc_size;
+}
+
+static int mlxsw_sp1_kvdl_part_alloc(struct mlxsw_sp1_kvdl_part *part,
+                                    u32 *p_kvdl_index)
+{
+       const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+       unsigned int entry_index, nr_entries;
+
+       nr_entries = (info->end_index - info->start_index + 1) /
+                    info->alloc_size;
+       entry_index = find_first_zero_bit(part->usage, nr_entries);
+       if (entry_index == nr_entries)
+               return -ENOBUFS;
+       __set_bit(entry_index, part->usage);
+
+       *p_kvdl_index = mlxsw_sp1_kvdl_to_kvdl_index(info, entry_index);
+
+       return 0;
+}
+
+static void mlxsw_sp1_kvdl_part_free(struct mlxsw_sp1_kvdl_part *part,
+                                    u32 kvdl_index)
+{
+       const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+       unsigned int entry_index;
+
+       entry_index = mlxsw_sp1_kvdl_to_entry_index(info, kvdl_index);
+       __clear_bit(entry_index, part->usage);
+}
+
+static int mlxsw_sp1_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, void *priv,
+                               enum mlxsw_sp_kvdl_entry_type type,
+                               unsigned int entry_count,
+                               u32 *p_entry_index)
+{
+       struct mlxsw_sp1_kvdl *kvdl = priv;
+       struct mlxsw_sp1_kvdl_part *part;
+
+       /* Find partition with smallest allocation size satisfying the
+        * requested size.
+        */
+       part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count);
+       if (IS_ERR(part))
+               return PTR_ERR(part);
+
+       return mlxsw_sp1_kvdl_part_alloc(part, p_entry_index);
+}
+
+static void mlxsw_sp1_kvdl_free(struct mlxsw_sp *mlxsw_sp, void *priv,
+                               enum mlxsw_sp_kvdl_entry_type type,
+                               unsigned int entry_count, int entry_index)
+{
+       struct mlxsw_sp1_kvdl *kvdl = priv;
+       struct mlxsw_sp1_kvdl_part *part;
+
+       part = mlxsw_sp1_kvdl_index_part(kvdl, entry_index);
+       if (IS_ERR(part))
+               return;
+       mlxsw_sp1_kvdl_part_free(part, entry_index);
+}
+
+static int mlxsw_sp1_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
+                                          void *priv,
+                                          enum mlxsw_sp_kvdl_entry_type type,
+                                          unsigned int entry_count,
+                                          unsigned int *p_alloc_size)
+{
+       struct mlxsw_sp1_kvdl *kvdl = priv;
+       struct mlxsw_sp1_kvdl_part *part;
+
+       part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count);
+       if (IS_ERR(part))
+               return PTR_ERR(part);
+
+       *p_alloc_size = part->info.alloc_size;
+
+       return 0;
+}
+
+static void mlxsw_sp1_kvdl_part_update(struct mlxsw_sp1_kvdl_part *part,
+                                      struct mlxsw_sp1_kvdl_part *part_prev,
+                                      unsigned int size)
+{
+       if (!part_prev) {
+               part->info.end_index = size - 1;
+       } else {
+               part->info.start_index = part_prev->info.end_index + 1;
+               part->info.end_index = part->info.start_index + size - 1;
+       }
+}
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
+                        const struct mlxsw_sp1_kvdl_part_info *info,
+                        struct mlxsw_sp1_kvdl_part *part_prev)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+       struct mlxsw_sp1_kvdl_part *part;
+       bool need_update = true;
+       unsigned int nr_entries;
+       size_t usage_size;
+       u64 resource_size;
+       int err;
+
+       err = devlink_resource_size_get(devlink, info->resource_id,
+                                       &resource_size);
+       if (err) {
+               need_update = false;
+               resource_size = info->end_index - info->start_index + 1;
+       }
+
+       nr_entries = div_u64(resource_size, info->alloc_size);
+       usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
+       part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+       if (!part)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(&part->info, info, sizeof(part->info));
+
+       if (need_update)
+               mlxsw_sp1_kvdl_part_update(part, part_prev, resource_size);
+       return part;
+}
+
+static void mlxsw_sp1_kvdl_part_fini(struct mlxsw_sp1_kvdl_part *part)
+{
+       kfree(part);
+}
+
+static int mlxsw_sp1_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp,
+                                    struct mlxsw_sp1_kvdl *kvdl)
+{
+       const struct mlxsw_sp1_kvdl_part_info *info;
+       struct mlxsw_sp1_kvdl_part *part_prev = NULL;
+       int err, i;
+
+       for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+               info = &mlxsw_sp1_kvdl_parts_info[i];
+               kvdl->parts[i] = mlxsw_sp1_kvdl_part_init(mlxsw_sp, info,
+                                                         part_prev);
+               if (IS_ERR(kvdl->parts[i])) {
+                       err = PTR_ERR(kvdl->parts[i]);
+                       goto err_kvdl_part_init;
+               }
+               part_prev = kvdl->parts[i];
+       }
+       return 0;
+
+err_kvdl_part_init:
+       for (i--; i >= 0; i--)
+               mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]);
+       return err;
+}
+
+static void mlxsw_sp1_kvdl_parts_fini(struct mlxsw_sp1_kvdl *kvdl)
+{
+       int i;
+
+       for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++)
+               mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]);
+}
+
+static u64 mlxsw_sp1_kvdl_part_occ(struct mlxsw_sp1_kvdl_part *part)
+{
+       const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+       unsigned int nr_entries;
+       int bit = -1;
+       u64 occ = 0;
+
+       nr_entries = (info->end_index -
+                     info->start_index + 1) /
+                     info->alloc_size;
+       while ((bit = find_next_bit(part->usage, nr_entries, bit + 1))
+               < nr_entries)
+               occ += info->alloc_size;
+       return occ;
+}
+
+static u64 mlxsw_sp1_kvdl_occ_get(void *priv)
+{
+       const struct mlxsw_sp1_kvdl *kvdl = priv;
+       u64 occ = 0;
+       int i;
+
+       for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++)
+               occ += mlxsw_sp1_kvdl_part_occ(kvdl->parts[i]);
+
+       return occ;
+}
+
+static u64 mlxsw_sp1_kvdl_single_occ_get(void *priv)
+{
+       const struct mlxsw_sp1_kvdl *kvdl = priv;
+       struct mlxsw_sp1_kvdl_part *part;
+
+       part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_SINGLE];
+       return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp1_kvdl_chunks_occ_get(void *priv)
+{
+       const struct mlxsw_sp1_kvdl *kvdl = priv;
+       struct mlxsw_sp1_kvdl_part *part;
+
+       part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_CHUNKS];
+       return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp1_kvdl_large_chunks_occ_get(void *priv)
+{
+       const struct mlxsw_sp1_kvdl *kvdl = priv;
+       struct mlxsw_sp1_kvdl_part *part;
+
+       part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS];
+       return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static int mlxsw_sp1_kvdl_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+       struct mlxsw_sp1_kvdl *kvdl = priv;
+       int err;
+
+       err = mlxsw_sp1_kvdl_parts_init(mlxsw_sp, kvdl);
+       if (err)
+               return err;
+       devlink_resource_occ_get_register(devlink,
+                                         MLXSW_SP_RESOURCE_KVD_LINEAR,
+                                         mlxsw_sp1_kvdl_occ_get,
+                                         kvdl);
+       devlink_resource_occ_get_register(devlink,
+                                         MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+                                         mlxsw_sp1_kvdl_single_occ_get,
+                                         kvdl);
+       devlink_resource_occ_get_register(devlink,
+                                         MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+                                         mlxsw_sp1_kvdl_chunks_occ_get,
+                                         kvdl);
+       devlink_resource_occ_get_register(devlink,
+                                         MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+                                         mlxsw_sp1_kvdl_large_chunks_occ_get,
+                                         kvdl);
+       return 0;
+}
+
+static void mlxsw_sp1_kvdl_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+       struct mlxsw_sp1_kvdl *kvdl = priv;
+
+       devlink_resource_occ_get_unregister(devlink,
+                                           MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS);
+       devlink_resource_occ_get_unregister(devlink,
+                                           MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS);
+       devlink_resource_occ_get_unregister(devlink,
+                                           MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE);
+       devlink_resource_occ_get_unregister(devlink,
+                                           MLXSW_SP_RESOURCE_KVD_LINEAR);
+       mlxsw_sp1_kvdl_parts_fini(kvdl);
+}
+
+const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops = {
+       .priv_size = sizeof(struct mlxsw_sp1_kvdl),
+       .init = mlxsw_sp1_kvdl_init,
+       .fini = mlxsw_sp1_kvdl_fini,
+       .alloc = mlxsw_sp1_kvdl_alloc,
+       .free = mlxsw_sp1_kvdl_free,
+       .alloc_size_query = mlxsw_sp1_kvdl_alloc_size_query,
+};
+
+int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_core);
+       static struct devlink_resource_size_params size_params;
+       u32 kvdl_max_size;
+       int err;
+
+       kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+                       MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
+                       MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
+
+       devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+                                         MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE,
+                                         DEVLINK_RESOURCE_UNIT_ENTRY);
+       err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
+                                       MLXSW_SP1_KVDL_SINGLE_SIZE,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR,
+                                       &size_params);
+       if (err)
+               return err;
+
+       devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+                                         MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE,
+                                         DEVLINK_RESOURCE_UNIT_ENTRY);
+       err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
+                                       MLXSW_SP1_KVDL_CHUNKS_SIZE,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR,
+                                       &size_params);
+       if (err)
+               return err;
+
+       devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+                                         MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE,
+                                         DEVLINK_RESOURCE_UNIT_ENTRY);
+       err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
+                                       MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+                                       MLXSW_SP_RESOURCE_KVD_LINEAR,
+                                       &size_params);
+       return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c
new file mode 100644 (file)
index 0000000..fc649fe
--- /dev/null
@@ -0,0 +1,374 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ * Copyright (c) 2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_actions.h"
+#include "spectrum_mr.h"
+
+struct mlxsw_sp1_mr_tcam_region {
+       struct mlxsw_sp *mlxsw_sp;
+       enum mlxsw_reg_rtar_key_type rtar_key_type;
+       struct parman *parman;
+       struct parman_prio *parman_prios;
+};
+
+struct mlxsw_sp1_mr_tcam {
+       struct mlxsw_sp1_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX];
+};
+
+struct mlxsw_sp1_mr_tcam_route {
+       struct parman_item parman_item;
+       struct parman_prio *parman_prio;
+};
+
+static int mlxsw_sp1_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
+                                          struct parman_item *parman_item,
+                                          struct mlxsw_sp_mr_route_key *key,
+                                          struct mlxsw_afa_block *afa_block)
+{
+       char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+       switch (key->proto) {
+       case MLXSW_SP_L3_PROTO_IPV4:
+               mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index,
+                                         key->vrid,
+                                         MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+                                         ntohl(key->group.addr4),
+                                         ntohl(key->group_mask.addr4),
+                                         ntohl(key->source.addr4),
+                                         ntohl(key->source_mask.addr4),
+                                         mlxsw_afa_block_first_set(afa_block));
+               break;
+       case MLXSW_SP_L3_PROTO_IPV6:
+               mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index,
+                                         key->vrid,
+                                         MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+                                         key->group.addr6,
+                                         key->group_mask.addr6,
+                                         key->source.addr6,
+                                         key->source_mask.addr6,
+                                         mlxsw_afa_block_first_set(afa_block));
+       }
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static int mlxsw_sp1_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp,
+                                         struct parman_item *parman_item,
+                                         struct mlxsw_sp_mr_route_key *key)
+{
+       struct in6_addr zero_addr = IN6ADDR_ANY_INIT;
+       char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+       switch (key->proto) {
+       case MLXSW_SP_L3_PROTO_IPV4:
+               mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index,
+                                         key->vrid, 0, 0, 0, 0, 0, 0, NULL);
+               break;
+       case MLXSW_SP_L3_PROTO_IPV6:
+               mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index,
+                                         key->vrid, 0, 0, zero_addr, zero_addr,
+                                         zero_addr, zero_addr, NULL);
+               break;
+       }
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static struct mlxsw_sp1_mr_tcam_region *
+mlxsw_sp1_mr_tcam_protocol_region(struct mlxsw_sp1_mr_tcam *mr_tcam,
+                                 enum mlxsw_sp_l3proto proto)
+{
+       return &mr_tcam->tcam_regions[proto];
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_parman_item_add(struct mlxsw_sp1_mr_tcam *mr_tcam,
+                                       struct mlxsw_sp1_mr_tcam_route *route,
+                                       struct mlxsw_sp_mr_route_key *key,
+                                       enum mlxsw_sp_mr_route_prio prio)
+{
+       struct mlxsw_sp1_mr_tcam_region *tcam_region;
+       int err;
+
+       tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto);
+       err = parman_item_add(tcam_region->parman,
+                             &tcam_region->parman_prios[prio],
+                             &route->parman_item);
+       if (err)
+               return err;
+
+       route->parman_prio = &tcam_region->parman_prios[prio];
+       return 0;
+}
+
+static void
+mlxsw_sp1_mr_tcam_route_parman_item_remove(struct mlxsw_sp1_mr_tcam *mr_tcam,
+                                          struct mlxsw_sp1_mr_tcam_route *route,
+                                          struct mlxsw_sp_mr_route_key *key)
+{
+       struct mlxsw_sp1_mr_tcam_region *tcam_region;
+
+       tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto);
+       parman_item_remove(tcam_region->parman,
+                          route->parman_prio, &route->parman_item);
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
+                              void *route_priv,
+                              struct mlxsw_sp_mr_route_key *key,
+                              struct mlxsw_afa_block *afa_block,
+                              enum mlxsw_sp_mr_route_prio prio)
+{
+       struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+       struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+       int err;
+
+       err = mlxsw_sp1_mr_tcam_route_parman_item_add(mr_tcam, route,
+                                                     key, prio);
+       if (err)
+               return err;
+
+       err = mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+                                             key, afa_block);
+       if (err)
+               goto err_route_replace;
+       return 0;
+
+err_route_replace:
+       mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key);
+       return err;
+}
+
+static void
+mlxsw_sp1_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv,
+                               void *route_priv,
+                               struct mlxsw_sp_mr_route_key *key)
+{
+       struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+       struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+
+       mlxsw_sp1_mr_tcam_route_remove(mlxsw_sp, &route->parman_item, key);
+       mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key);
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp,
+                              void *route_priv,
+                              struct mlxsw_sp_mr_route_key *key,
+                              struct mlxsw_afa_block *afa_block)
+{
+       struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+
+       return mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+                                              key, afa_block);
+}
+
+#define MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT 16
+#define MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP 16
+
+static int
+mlxsw_sp1_mr_tcam_region_alloc(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+       struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+       char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+       mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE,
+                           mr_tcam_region->rtar_key_type,
+                           MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void
+mlxsw_sp1_mr_tcam_region_free(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+       struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+       char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+       mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE,
+                           mr_tcam_region->rtar_key_type, 0);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static int mlxsw_sp1_mr_tcam_region_parman_resize(void *priv,
+                                                 unsigned long new_count)
+{
+       struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv;
+       struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+       char rtar_pl[MLXSW_REG_RTAR_LEN];
+       u64 max_tcam_rules;
+
+       max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
+       if (new_count > max_tcam_rules)
+               return -EINVAL;
+       mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE,
+                           mr_tcam_region->rtar_key_type, new_count);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void mlxsw_sp1_mr_tcam_region_parman_move(void *priv,
+                                                unsigned long from_index,
+                                                unsigned long to_index,
+                                                unsigned long count)
+{
+       struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv;
+       struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+       char rrcr_pl[MLXSW_REG_RRCR_LEN];
+
+       mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE,
+                           from_index, count,
+                           mr_tcam_region->rtar_key_type, to_index);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl);
+}
+
+static const struct parman_ops mlxsw_sp1_mr_tcam_region_parman_ops = {
+       .base_count     = MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT,
+       .resize_step    = MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP,
+       .resize         = mlxsw_sp1_mr_tcam_region_parman_resize,
+       .move           = mlxsw_sp1_mr_tcam_region_parman_move,
+       .algo           = PARMAN_ALGO_TYPE_LSORT,
+};
+
+static int
+mlxsw_sp1_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp1_mr_tcam_region *mr_tcam_region,
+                             enum mlxsw_reg_rtar_key_type rtar_key_type)
+{
+       struct parman_prio *parman_prios;
+       struct parman *parman;
+       int err;
+       int i;
+
+       mr_tcam_region->rtar_key_type = rtar_key_type;
+       mr_tcam_region->mlxsw_sp = mlxsw_sp;
+
+       err = mlxsw_sp1_mr_tcam_region_alloc(mr_tcam_region);
+       if (err)
+               return err;
+
+       parman = parman_create(&mlxsw_sp1_mr_tcam_region_parman_ops,
+                              mr_tcam_region);
+       if (!parman) {
+               err = -ENOMEM;
+               goto err_parman_create;
+       }
+       mr_tcam_region->parman = parman;
+
+       parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1,
+                                    sizeof(*parman_prios), GFP_KERNEL);
+       if (!parman_prios) {
+               err = -ENOMEM;
+               goto err_parman_prios_alloc;
+       }
+       mr_tcam_region->parman_prios = parman_prios;
+
+       for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+               parman_prio_init(mr_tcam_region->parman,
+                                &mr_tcam_region->parman_prios[i], i);
+       return 0;
+
+err_parman_prios_alloc:
+       parman_destroy(parman);
+err_parman_create:
+       mlxsw_sp1_mr_tcam_region_free(mr_tcam_region);
+       return err;
+}
+
+static void
+mlxsw_sp1_mr_tcam_region_fini(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+       int i;
+
+       for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+               parman_prio_fini(&mr_tcam_region->parman_prios[i]);
+       kfree(mr_tcam_region->parman_prios);
+       parman_destroy(mr_tcam_region->parman);
+       mlxsw_sp1_mr_tcam_region_free(mr_tcam_region);
+}
+
+static int mlxsw_sp1_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+       struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+       struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
+       u32 rtar_key;
+       int err;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
+               return -EIO;
+
+       rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST;
+       err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp,
+                                           &region[MLXSW_SP_L3_PROTO_IPV4],
+                                           rtar_key);
+       if (err)
+               return err;
+
+       rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST;
+       err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp,
+                                           &region[MLXSW_SP_L3_PROTO_IPV6],
+                                           rtar_key);
+       if (err)
+               goto err_ipv6_region_init;
+
+       return 0;
+
+err_ipv6_region_init:
+       mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+       return err;
+}
+
+static void mlxsw_sp1_mr_tcam_fini(void *priv)
+{
+       struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+       struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
+
+       mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV6]);
+       mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+}
+
+const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops = {
+       .priv_size = sizeof(struct mlxsw_sp1_mr_tcam),
+       .init = mlxsw_sp1_mr_tcam_init,
+       .fini = mlxsw_sp1_mr_tcam_fini,
+       .route_priv_size = sizeof(struct mlxsw_sp1_mr_tcam_route),
+       .route_create = mlxsw_sp1_mr_tcam_route_create,
+       .route_destroy = mlxsw_sp1_mr_tcam_route_destroy,
+       .route_update = mlxsw_sp1_mr_tcam_route_update,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
new file mode 100644 (file)
index 0000000..22c8764
--- /dev/null
@@ -0,0 +1,270 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+#include "core_acl_flex_actions.h"
+
+struct mlxsw_sp2_acl_tcam {
+       struct mlxsw_sp_acl_atcam atcam;
+       u32 kvdl_index;
+       unsigned int kvdl_count;
+};
+
+struct mlxsw_sp2_acl_tcam_region {
+       struct mlxsw_sp_acl_atcam_region aregion;
+       struct mlxsw_sp_acl_tcam_region *region;
+};
+
+struct mlxsw_sp2_acl_tcam_chunk {
+       struct mlxsw_sp_acl_atcam_chunk achunk;
+};
+
+struct mlxsw_sp2_acl_tcam_entry {
+       struct mlxsw_sp_acl_atcam_entry aentry;
+       struct mlxsw_afa_block *act_block;
+};
+
+static int
+mlxsw_sp2_acl_ctcam_region_entry_insert(struct mlxsw_sp_acl_ctcam_region *cregion,
+                                       struct mlxsw_sp_acl_ctcam_entry *centry,
+                                       const char *mask)
+{
+       struct mlxsw_sp_acl_atcam_region *aregion;
+       struct mlxsw_sp_acl_atcam_entry *aentry;
+       struct mlxsw_sp_acl_erp *erp;
+
+       aregion = mlxsw_sp_acl_tcam_cregion_aregion(cregion);
+       aentry = mlxsw_sp_acl_tcam_centry_aentry(centry);
+
+       erp = mlxsw_sp_acl_erp_get(aregion, mask, true);
+       if (IS_ERR(erp))
+               return PTR_ERR(erp);
+       aentry->erp = erp;
+
+       return 0;
+}
+
+static void
+mlxsw_sp2_acl_ctcam_region_entry_remove(struct mlxsw_sp_acl_ctcam_region *cregion,
+                                       struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+       struct mlxsw_sp_acl_atcam_region *aregion;
+       struct mlxsw_sp_acl_atcam_entry *aentry;
+
+       aregion = mlxsw_sp_acl_tcam_cregion_aregion(cregion);
+       aentry = mlxsw_sp_acl_tcam_centry_aentry(centry);
+
+       mlxsw_sp_acl_erp_put(aregion, aentry->erp);
+}
+
+static const struct mlxsw_sp_acl_ctcam_region_ops
+mlxsw_sp2_acl_ctcam_region_ops = {
+       .entry_insert = mlxsw_sp2_acl_ctcam_region_entry_insert,
+       .entry_remove = mlxsw_sp2_acl_ctcam_region_entry_remove,
+};
+
+static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv,
+                                  struct mlxsw_sp_acl_tcam *_tcam)
+{
+       struct mlxsw_sp2_acl_tcam *tcam = priv;
+       struct mlxsw_afa_block *afa_block;
+       char pefa_pl[MLXSW_REG_PEFA_LEN];
+       char pgcr_pl[MLXSW_REG_PGCR_LEN];
+       char *enc_actions;
+       int i;
+       int err;
+
+       tcam->kvdl_count = _tcam->max_regions;
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+                                 tcam->kvdl_count, &tcam->kvdl_index);
+       if (err)
+               return err;
+
+       /* Create flex action block, set default action (continue)
+        * but don't commit. We need just the current set encoding
+        * to be written using PEFA register to all indexes for all regions.
+        */
+       afa_block = mlxsw_afa_block_create(mlxsw_sp->afa);
+       if (!afa_block) {
+               err = -ENOMEM;
+               goto err_afa_block;
+       }
+       err = mlxsw_afa_block_continue(afa_block);
+       if (WARN_ON(err))
+               goto err_afa_block_continue;
+       enc_actions = mlxsw_afa_block_cur_set(afa_block);
+
+       for (i = 0; i < tcam->kvdl_count; i++) {
+               mlxsw_reg_pefa_pack(pefa_pl, tcam->kvdl_index + i,
+                                   true, enc_actions);
+               err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
+               if (err)
+                       goto err_pefa_write;
+       }
+       mlxsw_reg_pgcr_pack(pgcr_pl, tcam->kvdl_index);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pgcr), pgcr_pl);
+       if (err)
+               goto err_pgcr_write;
+
+       err = mlxsw_sp_acl_atcam_init(mlxsw_sp, &tcam->atcam);
+       if (err)
+               goto err_atcam_init;
+
+       mlxsw_afa_block_destroy(afa_block);
+       return 0;
+
+err_atcam_init:
+err_pgcr_write:
+err_pefa_write:
+err_afa_block_continue:
+       mlxsw_afa_block_destroy(afa_block);
+err_afa_block:
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+                          tcam->kvdl_count, tcam->kvdl_index);
+       return err;
+}
+
+static void mlxsw_sp2_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+       struct mlxsw_sp2_acl_tcam *tcam = priv;
+
+       mlxsw_sp_acl_atcam_fini(mlxsw_sp, &tcam->atcam);
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+                          tcam->kvdl_count, tcam->kvdl_index);
+}
+
+static int
+mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
+                              void *tcam_priv,
+                              struct mlxsw_sp_acl_tcam_region *_region)
+{
+       struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+       struct mlxsw_sp2_acl_tcam *tcam = tcam_priv;
+
+       region->region = _region;
+
+       return mlxsw_sp_acl_atcam_region_init(mlxsw_sp, &tcam->atcam,
+                                             &region->aregion, _region,
+                                             &mlxsw_sp2_acl_ctcam_region_ops);
+}
+
+static void
+mlxsw_sp2_acl_tcam_region_fini(struct mlxsw_sp *mlxsw_sp, void *region_priv)
+{
+       struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+
+       mlxsw_sp_acl_atcam_region_fini(&region->aregion);
+}
+
+static int
+mlxsw_sp2_acl_tcam_region_associate(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_acl_tcam_region *region)
+{
+       return mlxsw_sp_acl_atcam_region_associate(mlxsw_sp, region->id);
+}
+
+static void mlxsw_sp2_acl_tcam_chunk_init(void *region_priv, void *chunk_priv,
+                                         unsigned int priority)
+{
+       struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+       struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv;
+
+       mlxsw_sp_acl_atcam_chunk_init(&region->aregion, &chunk->achunk,
+                                     priority);
+}
+
+static void mlxsw_sp2_acl_tcam_chunk_fini(void *chunk_priv)
+{
+       struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv;
+
+       mlxsw_sp_acl_atcam_chunk_fini(&chunk->achunk);
+}
+
+static int mlxsw_sp2_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+                                       void *region_priv, void *chunk_priv,
+                                       void *entry_priv,
+                                       struct mlxsw_sp_acl_rule_info *rulei)
+{
+       struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+       struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv;
+       struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv;
+
+       entry->act_block = rulei->act_block;
+       return mlxsw_sp_acl_atcam_entry_add(mlxsw_sp, &region->aregion,
+                                           &chunk->achunk, &entry->aentry,
+                                           rulei);
+}
+
+static void mlxsw_sp2_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+                                        void *region_priv, void *chunk_priv,
+                                        void *entry_priv)
+{
+       struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+       struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv;
+       struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv;
+
+       mlxsw_sp_acl_atcam_entry_del(mlxsw_sp, &region->aregion, &chunk->achunk,
+                                    &entry->aentry);
+}
+
+static int
+mlxsw_sp2_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+                                     void *region_priv, void *entry_priv,
+                                     bool *activity)
+{
+       struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv;
+
+       return mlxsw_afa_block_activity_get(entry->act_block, activity);
+}
+
+const struct mlxsw_sp_acl_tcam_ops mlxsw_sp2_acl_tcam_ops = {
+       .key_type               = MLXSW_REG_PTAR_KEY_TYPE_FLEX2,
+       .priv_size              = sizeof(struct mlxsw_sp2_acl_tcam),
+       .init                   = mlxsw_sp2_acl_tcam_init,
+       .fini                   = mlxsw_sp2_acl_tcam_fini,
+       .region_priv_size       = sizeof(struct mlxsw_sp2_acl_tcam_region),
+       .region_init            = mlxsw_sp2_acl_tcam_region_init,
+       .region_fini            = mlxsw_sp2_acl_tcam_region_fini,
+       .region_associate       = mlxsw_sp2_acl_tcam_region_associate,
+       .chunk_priv_size        = sizeof(struct mlxsw_sp2_acl_tcam_chunk),
+       .chunk_init             = mlxsw_sp2_acl_tcam_chunk_init,
+       .chunk_fini             = mlxsw_sp2_acl_tcam_chunk_fini,
+       .entry_priv_size        = sizeof(struct mlxsw_sp2_acl_tcam_entry),
+       .entry_add              = mlxsw_sp2_acl_tcam_entry_add,
+       .entry_del              = mlxsw_sp2_acl_tcam_entry_del,
+       .entry_activity_get     = mlxsw_sp2_acl_tcam_entry_activity_get,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c
new file mode 100644 (file)
index 0000000..bacf748
--- /dev/null
@@ -0,0 +1,302 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "spectrum.h"
+#include "core.h"
+#include "reg.h"
+#include "resources.h"
+
+struct mlxsw_sp2_kvdl_part_info {
+       u8 res_type;
+       /* For each defined partititon we need to know how many
+        * usage bits we need and how many indexes there are
+        * represented by a single bit. This could be got from FW
+        * querying appropriate resources. So have the resource
+        * ids for for this purpose in partition definition.
+        */
+       enum mlxsw_res_id usage_bit_count_res_id;
+       enum mlxsw_res_id index_range_res_id;
+};
+
+#define MLXSW_SP2_KVDL_PART_INFO(_entry_type, _res_type,                       \
+                                _usage_bit_count_res_id, _index_range_res_id)  \
+[MLXSW_SP_KVDL_ENTRY_TYPE_##_entry_type] = {                                   \
+       .res_type = _res_type,                                                  \
+       .usage_bit_count_res_id = MLXSW_RES_ID_##_usage_bit_count_res_id,       \
+       .index_range_res_id = MLXSW_RES_ID_##_index_range_res_id,               \
+}
+
+static const struct mlxsw_sp2_kvdl_part_info mlxsw_sp2_kvdl_parts_info[] = {
+       MLXSW_SP2_KVDL_PART_INFO(ADJ, 0x21, KVD_SIZE, MAX_KVD_LINEAR_RANGE),
+       MLXSW_SP2_KVDL_PART_INFO(ACTSET, 0x23, MAX_KVD_ACTION_SETS,
+                                MAX_KVD_ACTION_SETS),
+       MLXSW_SP2_KVDL_PART_INFO(PBS, 0x24, KVD_SIZE, KVD_SIZE),
+       MLXSW_SP2_KVDL_PART_INFO(MCRIGR, 0x26, KVD_SIZE, KVD_SIZE),
+};
+
+#define MLXSW_SP2_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp2_kvdl_parts_info)
+
+struct mlxsw_sp2_kvdl_part {
+       const struct mlxsw_sp2_kvdl_part_info *info;
+       unsigned int usage_bit_count;
+       unsigned int indexes_per_usage_bit;
+       unsigned int last_allocated_bit;
+       unsigned long usage[0]; /* Usage bits */
+};
+
+struct mlxsw_sp2_kvdl {
+       struct mlxsw_sp2_kvdl_part *parts[MLXSW_SP2_KVDL_PARTS_INFO_LEN];
+};
+
+static int mlxsw_sp2_kvdl_part_find_zero_bits(struct mlxsw_sp2_kvdl_part *part,
+                                             unsigned int bit_count,
+                                             unsigned int *p_bit)
+{
+       unsigned int start_bit;
+       unsigned int bit;
+       unsigned int i;
+       bool wrap = false;
+
+       start_bit = part->last_allocated_bit + 1;
+       if (start_bit == part->usage_bit_count)
+               start_bit = 0;
+       bit = start_bit;
+again:
+       bit = find_next_zero_bit(part->usage, part->usage_bit_count, bit);
+       if (!wrap && bit + bit_count >= part->usage_bit_count) {
+               wrap = true;
+               bit = 0;
+               goto again;
+       }
+       if (wrap && bit + bit_count >= start_bit)
+               return -ENOBUFS;
+       for (i = 0; i < bit_count; i++) {
+               if (test_bit(bit + i, part->usage)) {
+                       bit += bit_count;
+                       goto again;
+               }
+       }
+       *p_bit = bit;
+       return 0;
+}
+
+static int mlxsw_sp2_kvdl_part_alloc(struct mlxsw_sp2_kvdl_part *part,
+                                    unsigned int size,
+                                    u32 *p_kvdl_index)
+{
+       unsigned int bit_count;
+       unsigned int bit;
+       unsigned int i;
+       int err;
+
+       bit_count = DIV_ROUND_UP(size, part->indexes_per_usage_bit);
+       err = mlxsw_sp2_kvdl_part_find_zero_bits(part, bit_count, &bit);
+       if (err)
+               return err;
+       for (i = 0; i < bit_count; i++)
+               __set_bit(bit + i, part->usage);
+       *p_kvdl_index = bit * part->indexes_per_usage_bit;
+       return 0;
+}
+
+static int mlxsw_sp2_kvdl_rec_del(struct mlxsw_sp *mlxsw_sp, u8 res_type,
+                                 u16 size, u32 kvdl_index)
+{
+       char *iedr_pl;
+       int err;
+
+       iedr_pl = kmalloc(MLXSW_REG_IEDR_LEN, GFP_KERNEL);
+       if (!iedr_pl)
+               return -ENOMEM;
+
+       mlxsw_reg_iedr_pack(iedr_pl);
+       mlxsw_reg_iedr_rec_pack(iedr_pl, 0, res_type, size, kvdl_index);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(iedr), iedr_pl);
+       kfree(iedr_pl);
+       return err;
+}
+
+static void mlxsw_sp2_kvdl_part_free(struct mlxsw_sp *mlxsw_sp,
+                                    struct mlxsw_sp2_kvdl_part *part,
+                                    unsigned int size, u32 kvdl_index)
+{
+       unsigned int bit_count;
+       unsigned int bit;
+       unsigned int i;
+       int err;
+
+       /* We need to ask FW to delete previously used KVD linear index */
+       err = mlxsw_sp2_kvdl_rec_del(mlxsw_sp, part->info->res_type,
+                                    size, kvdl_index);
+       if (err)
+               return;
+
+       bit_count = DIV_ROUND_UP(size, part->indexes_per_usage_bit);
+       bit = kvdl_index / part->indexes_per_usage_bit;
+       for (i = 0; i < bit_count; i++)
+               __clear_bit(bit + i, part->usage);
+}
+
+static int mlxsw_sp2_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, void *priv,
+                               enum mlxsw_sp_kvdl_entry_type type,
+                               unsigned int entry_count,
+                               u32 *p_entry_index)
+{
+       unsigned int size = entry_count * mlxsw_sp_kvdl_entry_size(type);
+       struct mlxsw_sp2_kvdl *kvdl = priv;
+       struct mlxsw_sp2_kvdl_part *part = kvdl->parts[type];
+
+       return mlxsw_sp2_kvdl_part_alloc(part, size, p_entry_index);
+}
+
+static void mlxsw_sp2_kvdl_free(struct mlxsw_sp *mlxsw_sp, void *priv,
+                               enum mlxsw_sp_kvdl_entry_type type,
+                               unsigned int entry_count,
+                               int entry_index)
+{
+       unsigned int size = entry_count * mlxsw_sp_kvdl_entry_size(type);
+       struct mlxsw_sp2_kvdl *kvdl = priv;
+       struct mlxsw_sp2_kvdl_part *part = kvdl->parts[type];
+
+       return mlxsw_sp2_kvdl_part_free(mlxsw_sp, part, size, entry_index);
+}
+
+static int mlxsw_sp2_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
+                                          void *priv,
+                                          enum mlxsw_sp_kvdl_entry_type type,
+                                          unsigned int entry_count,
+                                          unsigned int *p_alloc_count)
+{
+       *p_alloc_count = entry_count;
+       return 0;
+}
+
+static struct mlxsw_sp2_kvdl_part *
+mlxsw_sp2_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
+                        const struct mlxsw_sp2_kvdl_part_info *info)
+{
+       unsigned int indexes_per_usage_bit;
+       struct mlxsw_sp2_kvdl_part *part;
+       unsigned int index_range;
+       unsigned int usage_bit_count;
+       size_t usage_size;
+
+       if (!mlxsw_core_res_valid(mlxsw_sp->core,
+                                 info->usage_bit_count_res_id) ||
+           !mlxsw_core_res_valid(mlxsw_sp->core,
+                                 info->index_range_res_id))
+               return ERR_PTR(-EIO);
+       usage_bit_count = mlxsw_core_res_get(mlxsw_sp->core,
+                                            info->usage_bit_count_res_id);
+       index_range = mlxsw_core_res_get(mlxsw_sp->core,
+                                        info->index_range_res_id);
+
+       /* For some partitions, one usage bit represents a group of indexes.
+        * That's why we compute the number of indexes per usage bit here,
+        * according to queried resources.
+        */
+       indexes_per_usage_bit = index_range / usage_bit_count;
+
+       usage_size = BITS_TO_LONGS(usage_bit_count) * sizeof(unsigned long);
+       part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+       if (!part)
+               return ERR_PTR(-ENOMEM);
+       part->info = info;
+       part->usage_bit_count = usage_bit_count;
+       part->indexes_per_usage_bit = indexes_per_usage_bit;
+       part->last_allocated_bit = usage_bit_count - 1;
+       return part;
+}
+
+static void mlxsw_sp2_kvdl_part_fini(struct mlxsw_sp2_kvdl_part *part)
+{
+       kfree(part);
+}
+
+static int mlxsw_sp2_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp,
+                                    struct mlxsw_sp2_kvdl *kvdl)
+{
+       const struct mlxsw_sp2_kvdl_part_info *info;
+       int i;
+       int err;
+
+       for (i = 0; i < MLXSW_SP2_KVDL_PARTS_INFO_LEN; i++) {
+               info = &mlxsw_sp2_kvdl_parts_info[i];
+               kvdl->parts[i] = mlxsw_sp2_kvdl_part_init(mlxsw_sp, info);
+               if (IS_ERR(kvdl->parts[i])) {
+                       err = PTR_ERR(kvdl->parts[i]);
+                       goto err_kvdl_part_init;
+               }
+       }
+       return 0;
+
+err_kvdl_part_init:
+       for (i--; i >= 0; i--)
+               mlxsw_sp2_kvdl_part_fini(kvdl->parts[i]);
+       return err;
+}
+
+static void mlxsw_sp2_kvdl_parts_fini(struct mlxsw_sp2_kvdl *kvdl)
+{
+       int i;
+
+       for (i = 0; i < MLXSW_SP2_KVDL_PARTS_INFO_LEN; i++)
+               mlxsw_sp2_kvdl_part_fini(kvdl->parts[i]);
+}
+
+static int mlxsw_sp2_kvdl_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+       struct mlxsw_sp2_kvdl *kvdl = priv;
+
+       return mlxsw_sp2_kvdl_parts_init(mlxsw_sp, kvdl);
+}
+
+static void mlxsw_sp2_kvdl_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+       struct mlxsw_sp2_kvdl *kvdl = priv;
+
+       mlxsw_sp2_kvdl_parts_fini(kvdl);
+}
+
+const struct mlxsw_sp_kvdl_ops mlxsw_sp2_kvdl_ops = {
+       .priv_size = sizeof(struct mlxsw_sp2_kvdl),
+       .init = mlxsw_sp2_kvdl_init,
+       .fini = mlxsw_sp2_kvdl_fini,
+       .alloc = mlxsw_sp2_kvdl_alloc,
+       .free = mlxsw_sp2_kvdl_free,
+       .alloc_size_query = mlxsw_sp2_kvdl_alloc_size_query,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
new file mode 100644 (file)
index 0000000..53d4ab7
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+
+#include "core_acl_flex_actions.h"
+#include "spectrum.h"
+#include "spectrum_mr.h"
+
+static int
+mlxsw_sp2_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
+                              void *route_priv,
+                              struct mlxsw_sp_mr_route_key *key,
+                              struct mlxsw_afa_block *afa_block,
+                              enum mlxsw_sp_mr_route_prio prio)
+{
+       return 0;
+}
+
+static void
+mlxsw_sp2_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv,
+                               void *route_priv,
+                               struct mlxsw_sp_mr_route_key *key)
+{
+}
+
+static int
+mlxsw_sp2_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp,
+                              void *route_priv,
+                              struct mlxsw_sp_mr_route_key *key,
+                              struct mlxsw_afa_block *afa_block)
+{
+       return 0;
+}
+
+static int mlxsw_sp2_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+       return 0;
+}
+
+static void mlxsw_sp2_mr_tcam_fini(void *priv)
+{
+}
+
+const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops = {
+       .init = mlxsw_sp2_mr_tcam_init,
+       .fini = mlxsw_sp2_mr_tcam_fini,
+       .route_create = mlxsw_sp2_mr_tcam_route_create,
+       .route_destroy = mlxsw_sp2_mr_tcam_route_destroy,
+       .route_update = mlxsw_sp2_mr_tcam_route_update,
+};
index 79b1fa27a9a439301a544f621367f8b925cdf52d..6a38763ad261336cbfd5f3fb78b5ed89b14d15af 100644 (file)
 #include "spectrum.h"
 #include "core_acl_flex_keys.h"
 #include "core_acl_flex_actions.h"
-#include "spectrum_acl_flex_keys.h"
+#include "spectrum_acl_tcam.h"
 
 struct mlxsw_sp_acl {
        struct mlxsw_sp *mlxsw_sp;
        struct mlxsw_afk *afk;
        struct mlxsw_sp_fid *dummy_fid;
-       const struct mlxsw_sp_acl_ops *ops;
        struct rhashtable ruleset_ht;
        struct list_head rules;
        struct {
@@ -62,8 +61,7 @@ struct mlxsw_sp_acl {
                unsigned long interval; /* ms */
 #define MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS 1000
        } rule_activity_update;
-       unsigned long priv[0];
-       /* priv has to be always the last item */
+       struct mlxsw_sp_acl_tcam tcam;
 };
 
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl)
@@ -319,7 +317,8 @@ int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp,
 static struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
                            struct mlxsw_sp_acl_block *block, u32 chain_index,
-                           const struct mlxsw_sp_acl_profile_ops *ops)
+                           const struct mlxsw_sp_acl_profile_ops *ops,
+                           struct mlxsw_afk_element_usage *tmplt_elusage)
 {
        struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
        struct mlxsw_sp_acl_ruleset *ruleset;
@@ -339,7 +338,8 @@ mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_rhashtable_init;
 
-       err = ops->ruleset_add(mlxsw_sp, acl->priv, ruleset->priv);
+       err = ops->ruleset_add(mlxsw_sp, &acl->tcam, ruleset->priv,
+                              tmplt_elusage);
        if (err)
                goto err_ops_ruleset_add;
 
@@ -409,7 +409,7 @@ mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
        struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
        struct mlxsw_sp_acl_ruleset *ruleset;
 
-       ops = acl->ops->profile_ops(mlxsw_sp, profile);
+       ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile);
        if (!ops)
                return ERR_PTR(-EINVAL);
        ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, block, chain_index, ops);
@@ -421,13 +421,14 @@ mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
 struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
                         struct mlxsw_sp_acl_block *block, u32 chain_index,
-                        enum mlxsw_sp_acl_profile profile)
+                        enum mlxsw_sp_acl_profile profile,
+                        struct mlxsw_afk_element_usage *tmplt_elusage)
 {
        const struct mlxsw_sp_acl_profile_ops *ops;
        struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
        struct mlxsw_sp_acl_ruleset *ruleset;
 
-       ops = acl->ops->profile_ops(mlxsw_sp, profile);
+       ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile);
        if (!ops)
                return ERR_PTR(-EINVAL);
 
@@ -436,7 +437,8 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
                mlxsw_sp_acl_ruleset_ref_inc(ruleset);
                return ruleset;
        }
-       return mlxsw_sp_acl_ruleset_create(mlxsw_sp, block, chain_index, ops);
+       return mlxsw_sp_acl_ruleset_create(mlxsw_sp, block, chain_index, ops,
+                                          tmplt_elusage);
 }
 
 void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp,
@@ -487,7 +489,7 @@ int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei)
 void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
                                 unsigned int priority)
 {
-       rulei->priority = priority;
+       rulei->priority = priority >> 16;
 }
 
 void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei,
@@ -536,18 +538,23 @@ int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei)
 
 int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
                               struct mlxsw_sp_acl_rule_info *rulei,
-                              struct net_device *out_dev)
+                              struct net_device *out_dev,
+                              struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp_port *mlxsw_sp_port;
        u8 local_port;
        bool in_port;
 
        if (out_dev) {
-               if (!mlxsw_sp_port_dev_check(out_dev))
+               if (!mlxsw_sp_port_dev_check(out_dev)) {
+                       NL_SET_ERR_MSG_MOD(extack, "Invalid output device");
                        return -EINVAL;
+               }
                mlxsw_sp_port = netdev_priv(out_dev);
-               if (mlxsw_sp_port->mlxsw_sp != mlxsw_sp)
+               if (mlxsw_sp_port->mlxsw_sp != mlxsw_sp) {
+                       NL_SET_ERR_MSG_MOD(extack, "Invalid output device");
                        return -EINVAL;
+               }
                local_port = mlxsw_sp_port->local_port;
                in_port = false;
        } else {
@@ -558,20 +565,22 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
                in_port = true;
        }
        return mlxsw_afa_block_append_fwd(rulei->act_block,
-                                         local_port, in_port);
+                                         local_port, in_port, extack);
 }
 
 int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
                                  struct mlxsw_sp_acl_rule_info *rulei,
                                  struct mlxsw_sp_acl_block *block,
-                                 struct net_device *out_dev)
+                                 struct net_device *out_dev,
+                                 struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp_acl_block_binding *binding;
        struct mlxsw_sp_port *in_port;
 
-       if (!list_is_singular(&block->binding_list))
+       if (!list_is_singular(&block->binding_list)) {
+               NL_SET_ERR_MSG_MOD(extack, "Only a single mirror source is allowed");
                return -EOPNOTSUPP;
-
+       }
        binding = list_first_entry(&block->binding_list,
                                   struct mlxsw_sp_acl_block_binding, list);
        in_port = binding->mlxsw_sp_port;
@@ -579,12 +588,14 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
        return mlxsw_afa_block_append_mirror(rulei->act_block,
                                             in_port->local_port,
                                             out_dev,
-                                            binding->ingress);
+                                            binding->ingress,
+                                            extack);
 }
 
 int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp,
                                struct mlxsw_sp_acl_rule_info *rulei,
-                               u32 action, u16 vid, u16 proto, u8 prio)
+                               u32 action, u16 vid, u16 proto, u8 prio,
+                               struct netlink_ext_ack *extack)
 {
        u8 ethertype;
 
@@ -597,44 +608,50 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp,
                        ethertype = 1;
                        break;
                default:
+                       NL_SET_ERR_MSG_MOD(extack, "Unsupported VLAN protocol");
                        dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN protocol %#04x\n",
                                proto);
                        return -EINVAL;
                }
 
                return mlxsw_afa_block_append_vlan_modify(rulei->act_block,
-                                                         vid, prio, ethertype);
+                                                         vid, prio, ethertype,
+                                                         extack);
        } else {
+               NL_SET_ERR_MSG_MOD(extack, "Unsupported VLAN action");
                dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN action\n");
                return -EINVAL;
        }
 }
 
 int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp,
-                                struct mlxsw_sp_acl_rule_info *rulei)
+                                struct mlxsw_sp_acl_rule_info *rulei,
+                                struct netlink_ext_ack *extack)
 {
        return mlxsw_afa_block_append_counter(rulei->act_block,
-                                             &rulei->counter_index);
+                                             &rulei->counter_index, extack);
 }
 
 int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp,
                                   struct mlxsw_sp_acl_rule_info *rulei,
-                                  u16 fid)
+                                  u16 fid, struct netlink_ext_ack *extack)
 {
-       return mlxsw_afa_block_append_fid_set(rulei->act_block, fid);
+       return mlxsw_afa_block_append_fid_set(rulei->act_block, fid, extack);
 }
 
 struct mlxsw_sp_acl_rule *
 mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
                         struct mlxsw_sp_acl_ruleset *ruleset,
-                        unsigned long cookie)
+                        unsigned long cookie,
+                        struct netlink_ext_ack *extack)
 {
        const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
        struct mlxsw_sp_acl_rule *rule;
        int err;
 
        mlxsw_sp_acl_ruleset_ref_inc(ruleset);
-       rule = kzalloc(sizeof(*rule) + ops->rule_priv_size, GFP_KERNEL);
+       rule = kzalloc(sizeof(*rule) + ops->rule_priv_size(mlxsw_sp),
+                      GFP_KERNEL);
        if (!rule) {
                err = -ENOMEM;
                goto err_alloc;
@@ -825,20 +842,20 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
 
 int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
 {
-       const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops;
        struct mlxsw_sp_fid *fid;
        struct mlxsw_sp_acl *acl;
+       size_t alloc_size;
        int err;
 
-       acl = kzalloc(sizeof(*acl) + acl_ops->priv_size, GFP_KERNEL);
+       alloc_size = sizeof(*acl) + mlxsw_sp_acl_tcam_priv_size(mlxsw_sp);
+       acl = kzalloc(alloc_size, GFP_KERNEL);
        if (!acl)
                return -ENOMEM;
        mlxsw_sp->acl = acl;
        acl->mlxsw_sp = mlxsw_sp;
        acl->afk = mlxsw_afk_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
                                                       ACL_FLEX_KEYS),
-                                   mlxsw_sp_afk_blocks,
-                                   MLXSW_SP_AFK_BLOCKS_COUNT);
+                                   mlxsw_sp->afk_ops);
        if (!acl->afk) {
                err = -ENOMEM;
                goto err_afk_create;
@@ -857,12 +874,10 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
        acl->dummy_fid = fid;
 
        INIT_LIST_HEAD(&acl->rules);
-       err = acl_ops->init(mlxsw_sp, acl->priv);
+       err = mlxsw_sp_acl_tcam_init(mlxsw_sp, &acl->tcam);
        if (err)
                goto err_acl_ops_init;
 
-       acl->ops = acl_ops;
-
        /* Create the delayed work for the rule activity_update */
        INIT_DELAYED_WORK(&acl->rule_activity_update.dw,
                          mlxsw_sp_acl_rul_activity_update_work);
@@ -884,10 +899,9 @@ err_afk_create:
 void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp)
 {
        struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
-       const struct mlxsw_sp_acl_ops *acl_ops = acl->ops;
 
        cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw);
-       acl_ops->fini(mlxsw_sp, acl->priv);
+       mlxsw_sp_acl_tcam_fini(mlxsw_sp, &acl->tcam);
        WARN_ON(!list_empty(&acl->rules));
        mlxsw_sp_fid_put(acl->dummy_fid);
        rhashtable_destroy(&acl->ruleset_ht);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
new file mode 100644 (file)
index 0000000..3a05e0b
--- /dev/null
@@ -0,0 +1,568 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2018 Ido Schimmel <idosch@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/refcount.h>
+#include <linux/rhashtable.h>
+
+#include "reg.h"
+#include "core.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+#include "core_acl_flex_keys.h"
+
+#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_START 6
+#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_END   11
+
+struct mlxsw_sp_acl_atcam_lkey_id_ht_key {
+       char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* MSB blocks */
+       u8 erp_id;
+};
+
+struct mlxsw_sp_acl_atcam_lkey_id {
+       struct rhash_head ht_node;
+       struct mlxsw_sp_acl_atcam_lkey_id_ht_key ht_key;
+       refcount_t refcnt;
+       u32 id;
+};
+
+struct mlxsw_sp_acl_atcam_region_ops {
+       int (*init)(struct mlxsw_sp_acl_atcam_region *aregion);
+       void (*fini)(struct mlxsw_sp_acl_atcam_region *aregion);
+       struct mlxsw_sp_acl_atcam_lkey_id *
+               (*lkey_id_get)(struct mlxsw_sp_acl_atcam_region *aregion,
+                              struct mlxsw_sp_acl_rule_info *rulei, u8 erp_id);
+       void (*lkey_id_put)(struct mlxsw_sp_acl_atcam_region *aregion,
+                           struct mlxsw_sp_acl_atcam_lkey_id *lkey_id);
+};
+
+struct mlxsw_sp_acl_atcam_region_generic {
+       struct mlxsw_sp_acl_atcam_lkey_id dummy_lkey_id;
+};
+
+struct mlxsw_sp_acl_atcam_region_12kb {
+       struct rhashtable lkey_ht;
+       unsigned int max_lkey_id;
+       unsigned long *used_lkey_id;
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_atcam_lkey_id_ht_params = {
+       .key_len = sizeof(struct mlxsw_sp_acl_atcam_lkey_id_ht_key),
+       .key_offset = offsetof(struct mlxsw_sp_acl_atcam_lkey_id, ht_key),
+       .head_offset = offsetof(struct mlxsw_sp_acl_atcam_lkey_id, ht_node),
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_atcam_entries_ht_params = {
+       .key_len = sizeof(struct mlxsw_sp_acl_atcam_entry_ht_key),
+       .key_offset = offsetof(struct mlxsw_sp_acl_atcam_entry, ht_key),
+       .head_offset = offsetof(struct mlxsw_sp_acl_atcam_entry, ht_node),
+};
+
+static bool
+mlxsw_sp_acl_atcam_is_centry(const struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+       return mlxsw_sp_acl_erp_is_ctcam_erp(aentry->erp);
+}
+
+static int
+mlxsw_sp_acl_atcam_region_generic_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       struct mlxsw_sp_acl_atcam_region_generic *region_generic;
+
+       region_generic = kzalloc(sizeof(*region_generic), GFP_KERNEL);
+       if (!region_generic)
+               return -ENOMEM;
+
+       refcount_set(&region_generic->dummy_lkey_id.refcnt, 1);
+       aregion->priv = region_generic;
+
+       return 0;
+}
+
+static void
+mlxsw_sp_acl_atcam_region_generic_fini(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       kfree(aregion->priv);
+}
+
+static struct mlxsw_sp_acl_atcam_lkey_id *
+mlxsw_sp_acl_atcam_generic_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion,
+                                      struct mlxsw_sp_acl_rule_info *rulei,
+                                      u8 erp_id)
+{
+       struct mlxsw_sp_acl_atcam_region_generic *region_generic;
+
+       region_generic = aregion->priv;
+       return &region_generic->dummy_lkey_id;
+}
+
+static void
+mlxsw_sp_acl_atcam_generic_lkey_id_put(struct mlxsw_sp_acl_atcam_region *aregion,
+                                      struct mlxsw_sp_acl_atcam_lkey_id *lkey_id)
+{
+}
+
+static const struct mlxsw_sp_acl_atcam_region_ops
+mlxsw_sp_acl_atcam_region_generic_ops = {
+       .init           = mlxsw_sp_acl_atcam_region_generic_init,
+       .fini           = mlxsw_sp_acl_atcam_region_generic_fini,
+       .lkey_id_get    = mlxsw_sp_acl_atcam_generic_lkey_id_get,
+       .lkey_id_put    = mlxsw_sp_acl_atcam_generic_lkey_id_put,
+};
+
+static int
+mlxsw_sp_acl_atcam_region_12kb_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp;
+       struct mlxsw_sp_acl_atcam_region_12kb *region_12kb;
+       size_t alloc_size;
+       u64 max_lkey_id;
+       int err;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_LARGE_KEY_ID))
+               return -EIO;
+
+       max_lkey_id = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_LARGE_KEY_ID);
+       region_12kb = kzalloc(sizeof(*region_12kb), GFP_KERNEL);
+       if (!region_12kb)
+               return -ENOMEM;
+
+       alloc_size = BITS_TO_LONGS(max_lkey_id) * sizeof(unsigned long);
+       region_12kb->used_lkey_id = kzalloc(alloc_size, GFP_KERNEL);
+       if (!region_12kb->used_lkey_id) {
+               err = -ENOMEM;
+               goto err_used_lkey_id_alloc;
+       }
+
+       err = rhashtable_init(&region_12kb->lkey_ht,
+                             &mlxsw_sp_acl_atcam_lkey_id_ht_params);
+       if (err)
+               goto err_rhashtable_init;
+
+       region_12kb->max_lkey_id = max_lkey_id;
+       aregion->priv = region_12kb;
+
+       return 0;
+
+err_rhashtable_init:
+       kfree(region_12kb->used_lkey_id);
+err_used_lkey_id_alloc:
+       kfree(region_12kb);
+       return err;
+}
+
+static void
+mlxsw_sp_acl_atcam_region_12kb_fini(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv;
+
+       rhashtable_destroy(&region_12kb->lkey_ht);
+       kfree(region_12kb->used_lkey_id);
+       kfree(region_12kb);
+}
+
+static struct mlxsw_sp_acl_atcam_lkey_id *
+mlxsw_sp_acl_atcam_lkey_id_create(struct mlxsw_sp_acl_atcam_region *aregion,
+                                 struct mlxsw_sp_acl_atcam_lkey_id_ht_key *ht_key)
+{
+       struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv;
+       struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
+       u32 id;
+       int err;
+
+       id = find_first_zero_bit(region_12kb->used_lkey_id,
+                                region_12kb->max_lkey_id);
+       if (id < region_12kb->max_lkey_id)
+               __set_bit(id, region_12kb->used_lkey_id);
+       else
+               return ERR_PTR(-ENOBUFS);
+
+       lkey_id = kzalloc(sizeof(*lkey_id), GFP_KERNEL);
+       if (!lkey_id) {
+               err = -ENOMEM;
+               goto err_lkey_id_alloc;
+       }
+
+       lkey_id->id = id;
+       memcpy(&lkey_id->ht_key, ht_key, sizeof(*ht_key));
+       refcount_set(&lkey_id->refcnt, 1);
+
+       err = rhashtable_insert_fast(&region_12kb->lkey_ht,
+                                    &lkey_id->ht_node,
+                                    mlxsw_sp_acl_atcam_lkey_id_ht_params);
+       if (err)
+               goto err_rhashtable_insert;
+
+       return lkey_id;
+
+err_rhashtable_insert:
+       kfree(lkey_id);
+err_lkey_id_alloc:
+       __clear_bit(id, region_12kb->used_lkey_id);
+       return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_atcam_lkey_id_destroy(struct mlxsw_sp_acl_atcam_region *aregion,
+                                  struct mlxsw_sp_acl_atcam_lkey_id *lkey_id)
+{
+       struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv;
+       u32 id = lkey_id->id;
+
+       rhashtable_remove_fast(&region_12kb->lkey_ht, &lkey_id->ht_node,
+                              mlxsw_sp_acl_atcam_lkey_id_ht_params);
+       kfree(lkey_id);
+       __clear_bit(id, region_12kb->used_lkey_id);
+}
+
+static struct mlxsw_sp_acl_atcam_lkey_id *
+mlxsw_sp_acl_atcam_12kb_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion,
+                                   struct mlxsw_sp_acl_rule_info *rulei,
+                                   u8 erp_id)
+{
+       struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv;
+       struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+       struct mlxsw_sp_acl_atcam_lkey_id_ht_key ht_key = {{ 0 } };
+       struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+       struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+       struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
+
+       mlxsw_afk_encode(afk, region->key_info, &rulei->values, ht_key.enc_key,
+                        NULL, MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_START,
+                        MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_END);
+       ht_key.erp_id = erp_id;
+       lkey_id = rhashtable_lookup_fast(&region_12kb->lkey_ht, &ht_key,
+                                        mlxsw_sp_acl_atcam_lkey_id_ht_params);
+       if (lkey_id) {
+               refcount_inc(&lkey_id->refcnt);
+               return lkey_id;
+       }
+
+       return mlxsw_sp_acl_atcam_lkey_id_create(aregion, &ht_key);
+}
+
+static void
+mlxsw_sp_acl_atcam_12kb_lkey_id_put(struct mlxsw_sp_acl_atcam_region *aregion,
+                                   struct mlxsw_sp_acl_atcam_lkey_id *lkey_id)
+{
+       if (refcount_dec_and_test(&lkey_id->refcnt))
+               mlxsw_sp_acl_atcam_lkey_id_destroy(aregion, lkey_id);
+}
+
+static const struct mlxsw_sp_acl_atcam_region_ops
+mlxsw_sp_acl_atcam_region_12kb_ops = {
+       .init           = mlxsw_sp_acl_atcam_region_12kb_init,
+       .fini           = mlxsw_sp_acl_atcam_region_12kb_fini,
+       .lkey_id_get    = mlxsw_sp_acl_atcam_12kb_lkey_id_get,
+       .lkey_id_put    = mlxsw_sp_acl_atcam_12kb_lkey_id_put,
+};
+
+static const struct mlxsw_sp_acl_atcam_region_ops *
+mlxsw_sp_acl_atcam_region_ops_arr[] = {
+       [MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB]    =
+               &mlxsw_sp_acl_atcam_region_generic_ops,
+       [MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB]    =
+               &mlxsw_sp_acl_atcam_region_generic_ops,
+       [MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB]    =
+               &mlxsw_sp_acl_atcam_region_generic_ops,
+       [MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB]   =
+               &mlxsw_sp_acl_atcam_region_12kb_ops,
+};
+
+int mlxsw_sp_acl_atcam_region_associate(struct mlxsw_sp *mlxsw_sp,
+                                       u16 region_id)
+{
+       char perar_pl[MLXSW_REG_PERAR_LEN];
+       /* For now, just assume that every region has 12 key blocks */
+       u16 hw_region = region_id * 3;
+       u64 max_regions;
+
+       max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS);
+       if (hw_region >= max_regions)
+               return -ENOBUFS;
+
+       mlxsw_reg_perar_pack(perar_pl, region_id, hw_region);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(perar), perar_pl);
+}
+
+static void
+mlxsw_sp_acl_atcam_region_type_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+       enum mlxsw_sp_acl_atcam_region_type region_type;
+       unsigned int blocks_count;
+
+       /* We already know the blocks count can not exceed the maximum
+        * blocks count.
+        */
+       blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info);
+       if (blocks_count <= 2)
+               region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB;
+       else if (blocks_count <= 4)
+               region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB;
+       else if (blocks_count <= 8)
+               region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB;
+       else
+               region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB;
+
+       aregion->type = region_type;
+       aregion->ops = mlxsw_sp_acl_atcam_region_ops_arr[region_type];
+}
+
+int
+mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_acl_atcam *atcam,
+                              struct mlxsw_sp_acl_atcam_region *aregion,
+                              struct mlxsw_sp_acl_tcam_region *region,
+                              const struct mlxsw_sp_acl_ctcam_region_ops *ops)
+{
+       int err;
+
+       aregion->region = region;
+       aregion->atcam = atcam;
+       mlxsw_sp_acl_atcam_region_type_init(aregion);
+
+       err = rhashtable_init(&aregion->entries_ht,
+                             &mlxsw_sp_acl_atcam_entries_ht_params);
+       if (err)
+               return err;
+       err = aregion->ops->init(aregion);
+       if (err)
+               goto err_ops_init;
+       err = mlxsw_sp_acl_erp_region_init(aregion);
+       if (err)
+               goto err_erp_region_init;
+       err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &aregion->cregion,
+                                            region, ops);
+       if (err)
+               goto err_ctcam_region_init;
+
+       return 0;
+
+err_ctcam_region_init:
+       mlxsw_sp_acl_erp_region_fini(aregion);
+err_erp_region_init:
+       aregion->ops->fini(aregion);
+err_ops_init:
+       rhashtable_destroy(&aregion->entries_ht);
+       return err;
+}
+
+void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       mlxsw_sp_acl_ctcam_region_fini(&aregion->cregion);
+       mlxsw_sp_acl_erp_region_fini(aregion);
+       aregion->ops->fini(aregion);
+       rhashtable_destroy(&aregion->entries_ht);
+}
+
+void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion,
+                                  struct mlxsw_sp_acl_atcam_chunk *achunk,
+                                  unsigned int priority)
+{
+       mlxsw_sp_acl_ctcam_chunk_init(&aregion->cregion, &achunk->cchunk,
+                                     priority);
+}
+
+void mlxsw_sp_acl_atcam_chunk_fini(struct mlxsw_sp_acl_atcam_chunk *achunk)
+{
+       mlxsw_sp_acl_ctcam_chunk_fini(&achunk->cchunk);
+}
+
+static int
+mlxsw_sp_acl_atcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_acl_atcam_region *aregion,
+                                      struct mlxsw_sp_acl_atcam_entry *aentry,
+                                      struct mlxsw_sp_acl_rule_info *rulei)
+{
+       struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+       u8 erp_id = mlxsw_sp_acl_erp_id(aentry->erp);
+       struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
+       char ptce3_pl[MLXSW_REG_PTCE3_LEN];
+       u32 kvdl_index, priority;
+       int err;
+
+       err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority, true);
+       if (err)
+               return err;
+
+       lkey_id = aregion->ops->lkey_id_get(aregion, rulei, erp_id);
+       if (IS_ERR(lkey_id))
+               return PTR_ERR(lkey_id);
+       aentry->lkey_id = lkey_id;
+
+       kvdl_index = mlxsw_afa_block_first_kvdl_index(rulei->act_block);
+       mlxsw_reg_ptce3_pack(ptce3_pl, true, MLXSW_REG_PTCE3_OP_WRITE_WRITE,
+                            priority, region->tcam_region_info,
+                            aentry->ht_key.enc_key, erp_id,
+                            refcount_read(&lkey_id->refcnt) != 1, lkey_id->id,
+                            kvdl_index);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl);
+       if (err)
+               goto err_ptce3_write;
+
+       return 0;
+
+err_ptce3_write:
+       aregion->ops->lkey_id_put(aregion, lkey_id);
+       return err;
+}
+
+static void
+mlxsw_sp_acl_atcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_acl_atcam_region *aregion,
+                                      struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+       struct mlxsw_sp_acl_atcam_lkey_id *lkey_id = aentry->lkey_id;
+       struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+       u8 erp_id = mlxsw_sp_acl_erp_id(aentry->erp);
+       char ptce3_pl[MLXSW_REG_PTCE3_LEN];
+
+       mlxsw_reg_ptce3_pack(ptce3_pl, false, MLXSW_REG_PTCE3_OP_WRITE_WRITE, 0,
+                            region->tcam_region_info, aentry->ht_key.enc_key,
+                            erp_id, refcount_read(&lkey_id->refcnt) != 1,
+                            lkey_id->id, 0);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl);
+       aregion->ops->lkey_id_put(aregion, lkey_id);
+}
+
+static int
+__mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_acl_atcam_region *aregion,
+                              struct mlxsw_sp_acl_atcam_entry *aentry,
+                              struct mlxsw_sp_acl_rule_info *rulei)
+{
+       struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+       char mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN] = { 0 };
+       struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+       struct mlxsw_sp_acl_erp *erp;
+       unsigned int blocks_count;
+       int err;
+
+       blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info);
+       mlxsw_afk_encode(afk, region->key_info, &rulei->values,
+                        aentry->ht_key.enc_key, mask, 0, blocks_count - 1);
+
+       erp = mlxsw_sp_acl_erp_get(aregion, mask, false);
+       if (IS_ERR(erp))
+               return PTR_ERR(erp);
+       aentry->erp = erp;
+       aentry->ht_key.erp_id = mlxsw_sp_acl_erp_id(erp);
+
+       /* We can't insert identical rules into the A-TCAM, so fail and
+        * let the rule spill into C-TCAM
+        */
+       err = rhashtable_lookup_insert_fast(&aregion->entries_ht,
+                                           &aentry->ht_node,
+                                           mlxsw_sp_acl_atcam_entries_ht_params);
+       if (err)
+               goto err_rhashtable_insert;
+
+       err = mlxsw_sp_acl_atcam_region_entry_insert(mlxsw_sp, aregion, aentry,
+                                                    rulei);
+       if (err)
+               goto err_rule_insert;
+
+       return 0;
+
+err_rule_insert:
+       rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node,
+                              mlxsw_sp_acl_atcam_entries_ht_params);
+err_rhashtable_insert:
+       mlxsw_sp_acl_erp_put(aregion, erp);
+       return err;
+}
+
+static void
+__mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_acl_atcam_region *aregion,
+                              struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+       mlxsw_sp_acl_atcam_region_entry_remove(mlxsw_sp, aregion, aentry);
+       rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node,
+                              mlxsw_sp_acl_atcam_entries_ht_params);
+       mlxsw_sp_acl_erp_put(aregion, aentry->erp);
+}
+
+int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_atcam_region *aregion,
+                                struct mlxsw_sp_acl_atcam_chunk *achunk,
+                                struct mlxsw_sp_acl_atcam_entry *aentry,
+                                struct mlxsw_sp_acl_rule_info *rulei)
+{
+       int err;
+
+       err = __mlxsw_sp_acl_atcam_entry_add(mlxsw_sp, aregion, aentry, rulei);
+       if (!err)
+               return 0;
+
+       /* It is possible we failed to add the rule to the A-TCAM due to
+        * exceeded number of masks. Try to spill into C-TCAM.
+        */
+       err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &aregion->cregion,
+                                          &achunk->cchunk, &aentry->centry,
+                                          rulei, true);
+       if (!err)
+               return 0;
+
+       return err;
+}
+
+void mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_acl_atcam_region *aregion,
+                                 struct mlxsw_sp_acl_atcam_chunk *achunk,
+                                 struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+       if (mlxsw_sp_acl_atcam_is_centry(aentry))
+               mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &aregion->cregion,
+                                            &achunk->cchunk, &aentry->centry);
+       else
+               __mlxsw_sp_acl_atcam_entry_del(mlxsw_sp, aregion, aentry);
+}
+
+int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp,
+                           struct mlxsw_sp_acl_atcam *atcam)
+{
+       return mlxsw_sp_acl_erps_init(mlxsw_sp, atcam);
+}
+
+void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp,
+                            struct mlxsw_sp_acl_atcam *atcam)
+{
+       mlxsw_sp_acl_erps_fini(mlxsw_sp, atcam);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
new file mode 100644 (file)
index 0000000..7440a11
--- /dev/null
@@ -0,0 +1,227 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "core.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+static int
+mlxsw_sp_acl_ctcam_region_resize(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_tcam_region *region,
+                                u16 new_size)
+{
+       char ptar_pl[MLXSW_REG_PTAR_LEN];
+
+       mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE,
+                           region->key_type, new_size, region->id,
+                           region->tcam_region_info);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
+}
+
+static void
+mlxsw_sp_acl_ctcam_region_move(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_acl_tcam_region *region,
+                              u16 src_offset, u16 dst_offset, u16 size)
+{
+       char prcr_pl[MLXSW_REG_PRCR_LEN];
+
+       mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE,
+                           region->tcam_region_info, src_offset,
+                           region->tcam_region_info, dst_offset, size);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl);
+}
+
+static int
+mlxsw_sp_acl_ctcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_acl_ctcam_region *cregion,
+                                      struct mlxsw_sp_acl_ctcam_entry *centry,
+                                      struct mlxsw_sp_acl_rule_info *rulei,
+                                      bool fillup_priority)
+{
+       struct mlxsw_sp_acl_tcam_region *region = cregion->region;
+       struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+       char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+       unsigned int blocks_count;
+       char *act_set;
+       u32 priority;
+       char *mask;
+       char *key;
+       int err;
+
+       err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority,
+                                            fillup_priority);
+       if (err)
+               return err;
+
+       mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
+                            region->tcam_region_info,
+                            centry->parman_item.index, priority);
+       key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl);
+       mask = mlxsw_reg_ptce2_mask_data(ptce2_pl);
+       blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info);
+       mlxsw_afk_encode(afk, region->key_info, &rulei->values, key, mask, 0,
+                        blocks_count - 1);
+
+       err = cregion->ops->entry_insert(cregion, centry, mask);
+       if (err)
+               return err;
+
+       /* Only the first action set belongs here, the rest is in KVD */
+       act_set = mlxsw_afa_block_first_set(rulei->act_block);
+       mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+}
+
+static void
+mlxsw_sp_acl_ctcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_acl_ctcam_region *cregion,
+                                      struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+       char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+
+       mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
+                            cregion->region->tcam_region_info,
+                            centry->parman_item.index, 0);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+       cregion->ops->entry_remove(cregion, centry);
+}
+
+static int mlxsw_sp_acl_ctcam_region_parman_resize(void *priv,
+                                                  unsigned long new_count)
+{
+       struct mlxsw_sp_acl_ctcam_region *cregion = priv;
+       struct mlxsw_sp_acl_tcam_region *region = cregion->region;
+       struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+       u64 max_tcam_rules;
+
+       max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
+       if (new_count > max_tcam_rules)
+               return -EINVAL;
+       return mlxsw_sp_acl_ctcam_region_resize(mlxsw_sp, region, new_count);
+}
+
+static void mlxsw_sp_acl_ctcam_region_parman_move(void *priv,
+                                                 unsigned long from_index,
+                                                 unsigned long to_index,
+                                                 unsigned long count)
+{
+       struct mlxsw_sp_acl_ctcam_region *cregion = priv;
+       struct mlxsw_sp_acl_tcam_region *region = cregion->region;
+       struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+
+       mlxsw_sp_acl_ctcam_region_move(mlxsw_sp, region,
+                                      from_index, to_index, count);
+}
+
+static const struct parman_ops mlxsw_sp_acl_ctcam_region_parman_ops = {
+       .base_count     = MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT,
+       .resize_step    = MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP,
+       .resize         = mlxsw_sp_acl_ctcam_region_parman_resize,
+       .move           = mlxsw_sp_acl_ctcam_region_parman_move,
+       .algo           = PARMAN_ALGO_TYPE_LSORT,
+};
+
+int
+mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_acl_ctcam_region *cregion,
+                              struct mlxsw_sp_acl_tcam_region *region,
+                              const struct mlxsw_sp_acl_ctcam_region_ops *ops)
+{
+       cregion->region = region;
+       cregion->ops = ops;
+       cregion->parman = parman_create(&mlxsw_sp_acl_ctcam_region_parman_ops,
+                                       cregion);
+       if (!cregion->parman)
+               return -ENOMEM;
+       return 0;
+}
+
+void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion)
+{
+       parman_destroy(cregion->parman);
+}
+
+void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion,
+                                  struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+                                  unsigned int priority)
+{
+       parman_prio_init(cregion->parman, &cchunk->parman_prio, priority);
+}
+
+void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk)
+{
+       parman_prio_fini(&cchunk->parman_prio);
+}
+
+int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_ctcam_region *cregion,
+                                struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+                                struct mlxsw_sp_acl_ctcam_entry *centry,
+                                struct mlxsw_sp_acl_rule_info *rulei,
+                                bool fillup_priority)
+{
+       int err;
+
+       err = parman_item_add(cregion->parman, &cchunk->parman_prio,
+                             &centry->parman_item);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_acl_ctcam_region_entry_insert(mlxsw_sp, cregion, centry,
+                                                    rulei, fillup_priority);
+       if (err)
+               goto err_rule_insert;
+       return 0;
+
+err_rule_insert:
+       parman_item_remove(cregion->parman, &cchunk->parman_prio,
+                          &centry->parman_item);
+       return err;
+}
+
+void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_acl_ctcam_region *cregion,
+                                 struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+                                 struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+       mlxsw_sp_acl_ctcam_region_entry_remove(mlxsw_sp, cregion, centry);
+       parman_item_remove(cregion->parman, &cchunk->parman_prio,
+                          &centry->parman_item);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
new file mode 100644 (file)
index 0000000..463590b
--- /dev/null
@@ -0,0 +1,1199 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Ido Schimmel <idosch@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/errno.h>
+#include <linux/genalloc.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/rhashtable.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+
+#include "core.h"
+#include "reg.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+/* gen_pool_alloc() returns 0 when allocation fails, so use an offset */
+#define MLXSW_SP_ACL_ERP_GENALLOC_OFFSET 0x100
+#define MLXSW_SP_ACL_ERP_MAX_PER_REGION 16
+
+struct mlxsw_sp_acl_erp_core {
+       unsigned int erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX + 1];
+       struct gen_pool *erp_tables;
+       struct mlxsw_sp *mlxsw_sp;
+       unsigned int num_erp_banks;
+};
+
+struct mlxsw_sp_acl_erp_key {
+       char mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN];
+       bool ctcam;
+};
+
+struct mlxsw_sp_acl_erp {
+       struct mlxsw_sp_acl_erp_key key;
+       u8 id;
+       u8 index;
+       refcount_t refcnt;
+       DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN);
+       struct list_head list;
+       struct rhash_head ht_node;
+       struct mlxsw_sp_acl_erp_table *erp_table;
+};
+
+struct mlxsw_sp_acl_erp_master_mask {
+       DECLARE_BITMAP(bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN);
+       unsigned int count[MLXSW_SP_ACL_TCAM_MASK_LEN];
+};
+
+struct mlxsw_sp_acl_erp_table {
+       struct mlxsw_sp_acl_erp_master_mask master_mask;
+       DECLARE_BITMAP(erp_id_bitmap, MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+       DECLARE_BITMAP(erp_index_bitmap, MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+       struct list_head atcam_erps_list;
+       struct rhashtable erp_ht;
+       struct mlxsw_sp_acl_erp_core *erp_core;
+       struct mlxsw_sp_acl_atcam_region *aregion;
+       const struct mlxsw_sp_acl_erp_table_ops *ops;
+       unsigned long base_index;
+       unsigned int num_atcam_erps;
+       unsigned int num_max_atcam_erps;
+       unsigned int num_ctcam_erps;
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_erp_ht_params = {
+       .key_len = sizeof(struct mlxsw_sp_acl_erp_key),
+       .key_offset = offsetof(struct mlxsw_sp_acl_erp, key),
+       .head_offset = offsetof(struct mlxsw_sp_acl_erp, ht_node),
+};
+
+struct mlxsw_sp_acl_erp_table_ops {
+       struct mlxsw_sp_acl_erp *
+               (*erp_create)(struct mlxsw_sp_acl_erp_table *erp_table,
+                             struct mlxsw_sp_acl_erp_key *key);
+       void (*erp_destroy)(struct mlxsw_sp_acl_erp_table *erp_table,
+                           struct mlxsw_sp_acl_erp *erp);
+};
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+                            struct mlxsw_sp_acl_erp_key *key);
+static void
+mlxsw_sp_acl_erp_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+                             struct mlxsw_sp_acl_erp *erp);
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_second_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+                                   struct mlxsw_sp_acl_erp_key *key);
+static void
+mlxsw_sp_acl_erp_second_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+                                    struct mlxsw_sp_acl_erp *erp);
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_first_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+                                  struct mlxsw_sp_acl_erp_key *key);
+static void
+mlxsw_sp_acl_erp_first_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+                                   struct mlxsw_sp_acl_erp *erp);
+static void
+mlxsw_sp_acl_erp_no_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+                                struct mlxsw_sp_acl_erp *erp);
+
+static const struct mlxsw_sp_acl_erp_table_ops erp_multiple_masks_ops = {
+       .erp_create = mlxsw_sp_acl_erp_mask_create,
+       .erp_destroy = mlxsw_sp_acl_erp_mask_destroy,
+};
+
+static const struct mlxsw_sp_acl_erp_table_ops erp_two_masks_ops = {
+       .erp_create = mlxsw_sp_acl_erp_mask_create,
+       .erp_destroy = mlxsw_sp_acl_erp_second_mask_destroy,
+};
+
+static const struct mlxsw_sp_acl_erp_table_ops erp_single_mask_ops = {
+       .erp_create = mlxsw_sp_acl_erp_second_mask_create,
+       .erp_destroy = mlxsw_sp_acl_erp_first_mask_destroy,
+};
+
+static const struct mlxsw_sp_acl_erp_table_ops erp_no_mask_ops = {
+       .erp_create = mlxsw_sp_acl_erp_first_mask_create,
+       .erp_destroy = mlxsw_sp_acl_erp_no_mask_destroy,
+};
+
+bool mlxsw_sp_acl_erp_is_ctcam_erp(const struct mlxsw_sp_acl_erp *erp)
+{
+       return erp->key.ctcam;
+}
+
+u8 mlxsw_sp_acl_erp_id(const struct mlxsw_sp_acl_erp *erp)
+{
+       return erp->id;
+}
+
+static unsigned int
+mlxsw_sp_acl_erp_table_entry_size(const struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion;
+       struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
+
+       return erp_core->erpt_entries_size[aregion->type];
+}
+
+static int mlxsw_sp_acl_erp_id_get(struct mlxsw_sp_acl_erp_table *erp_table,
+                                  u8 *p_id)
+{
+       u8 id;
+
+       id = find_first_zero_bit(erp_table->erp_id_bitmap,
+                                MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+       if (id < MLXSW_SP_ACL_ERP_MAX_PER_REGION) {
+               __set_bit(id, erp_table->erp_id_bitmap);
+               *p_id = id;
+               return 0;
+       }
+
+       return -ENOBUFS;
+}
+
+static void mlxsw_sp_acl_erp_id_put(struct mlxsw_sp_acl_erp_table *erp_table,
+                                   u8 id)
+{
+       __clear_bit(id, erp_table->erp_id_bitmap);
+}
+
+static void
+mlxsw_sp_acl_erp_master_mask_bit_set(unsigned long bit,
+                                    struct mlxsw_sp_acl_erp_master_mask *mask)
+{
+       if (mask->count[bit]++ == 0)
+               __set_bit(bit, mask->bitmap);
+}
+
+static void
+mlxsw_sp_acl_erp_master_mask_bit_clear(unsigned long bit,
+                                      struct mlxsw_sp_acl_erp_master_mask *mask)
+{
+       if (--mask->count[bit] == 0)
+               __clear_bit(bit, mask->bitmap);
+}
+
+static int
+mlxsw_sp_acl_erp_master_mask_update(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region;
+       struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+       char percr_pl[MLXSW_REG_PERCR_LEN];
+       char *master_mask;
+
+       mlxsw_reg_percr_pack(percr_pl, region->id);
+       master_mask = mlxsw_reg_percr_master_mask_data(percr_pl);
+       bitmap_to_arr32((u32 *) master_mask, erp_table->master_mask.bitmap,
+                       MLXSW_SP_ACL_TCAM_MASK_LEN);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(percr), percr_pl);
+}
+
+static int
+mlxsw_sp_acl_erp_master_mask_set(struct mlxsw_sp_acl_erp_table *erp_table,
+                                const struct mlxsw_sp_acl_erp *erp)
+{
+       unsigned long bit;
+       int err;
+
+       for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+               mlxsw_sp_acl_erp_master_mask_bit_set(bit,
+                                                    &erp_table->master_mask);
+
+       err = mlxsw_sp_acl_erp_master_mask_update(erp_table);
+       if (err)
+               goto err_master_mask_update;
+
+       return 0;
+
+err_master_mask_update:
+       for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+               mlxsw_sp_acl_erp_master_mask_bit_clear(bit,
+                                                      &erp_table->master_mask);
+       return err;
+}
+
+static int
+mlxsw_sp_acl_erp_master_mask_clear(struct mlxsw_sp_acl_erp_table *erp_table,
+                                  const struct mlxsw_sp_acl_erp *erp)
+{
+       unsigned long bit;
+       int err;
+
+       for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+               mlxsw_sp_acl_erp_master_mask_bit_clear(bit,
+                                                      &erp_table->master_mask);
+
+       err = mlxsw_sp_acl_erp_master_mask_update(erp_table);
+       if (err)
+               goto err_master_mask_update;
+
+       return 0;
+
+err_master_mask_update:
+       for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+               mlxsw_sp_acl_erp_master_mask_bit_set(bit,
+                                                    &erp_table->master_mask);
+       return err;
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_generic_create(struct mlxsw_sp_acl_erp_table *erp_table,
+                               struct mlxsw_sp_acl_erp_key *key)
+{
+       struct mlxsw_sp_acl_erp *erp;
+       int err;
+
+       erp = kzalloc(sizeof(*erp), GFP_KERNEL);
+       if (!erp)
+               return ERR_PTR(-ENOMEM);
+
+       err = mlxsw_sp_acl_erp_id_get(erp_table, &erp->id);
+       if (err)
+               goto err_erp_id_get;
+
+       memcpy(&erp->key, key, sizeof(*key));
+       bitmap_from_arr32(erp->mask_bitmap, (u32 *) key->mask,
+                         MLXSW_SP_ACL_TCAM_MASK_LEN);
+       list_add(&erp->list, &erp_table->atcam_erps_list);
+       refcount_set(&erp->refcnt, 1);
+       erp_table->num_atcam_erps++;
+       erp->erp_table = erp_table;
+
+       err = mlxsw_sp_acl_erp_master_mask_set(erp_table, erp);
+       if (err)
+               goto err_master_mask_set;
+
+       err = rhashtable_insert_fast(&erp_table->erp_ht, &erp->ht_node,
+                                    mlxsw_sp_acl_erp_ht_params);
+       if (err)
+               goto err_rhashtable_insert;
+
+       return erp;
+
+err_rhashtable_insert:
+       mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
+err_master_mask_set:
+       erp_table->num_atcam_erps--;
+       list_del(&erp->list);
+       mlxsw_sp_acl_erp_id_put(erp_table, erp->id);
+err_erp_id_get:
+       kfree(erp);
+       return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_erp_generic_destroy(struct mlxsw_sp_acl_erp *erp)
+{
+       struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table;
+
+       rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node,
+                              mlxsw_sp_acl_erp_ht_params);
+       mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
+       erp_table->num_atcam_erps--;
+       list_del(&erp->list);
+       mlxsw_sp_acl_erp_id_put(erp_table, erp->id);
+       kfree(erp);
+}
+
+static int
+mlxsw_sp_acl_erp_table_alloc(struct mlxsw_sp_acl_erp_core *erp_core,
+                            unsigned int num_erps,
+                            enum mlxsw_sp_acl_atcam_region_type region_type,
+                            unsigned long *p_index)
+{
+       unsigned int num_rows, entry_size;
+
+       /* We only allow allocations of entire rows */
+       if (num_erps % erp_core->num_erp_banks != 0)
+               return -EINVAL;
+
+       entry_size = erp_core->erpt_entries_size[region_type];
+       num_rows = num_erps / erp_core->num_erp_banks;
+
+       *p_index = gen_pool_alloc(erp_core->erp_tables, num_rows * entry_size);
+       if (*p_index == 0)
+               return -ENOBUFS;
+       *p_index -= MLXSW_SP_ACL_ERP_GENALLOC_OFFSET;
+
+       return 0;
+}
+
+static void
+mlxsw_sp_acl_erp_table_free(struct mlxsw_sp_acl_erp_core *erp_core,
+                           unsigned int num_erps,
+                           enum mlxsw_sp_acl_atcam_region_type region_type,
+                           unsigned long index)
+{
+       unsigned long base_index;
+       unsigned int entry_size;
+       size_t size;
+
+       entry_size = erp_core->erpt_entries_size[region_type];
+       base_index = index + MLXSW_SP_ACL_ERP_GENALLOC_OFFSET;
+       size = num_erps / erp_core->num_erp_banks * entry_size;
+       gen_pool_free(erp_core->erp_tables, base_index, size);
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_table_master_rp(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       if (!list_is_singular(&erp_table->atcam_erps_list))
+               return NULL;
+
+       return list_first_entry(&erp_table->atcam_erps_list,
+                               struct mlxsw_sp_acl_erp, list);
+}
+
+static int mlxsw_sp_acl_erp_index_get(struct mlxsw_sp_acl_erp_table *erp_table,
+                                     u8 *p_index)
+{
+       u8 index;
+
+       index = find_first_zero_bit(erp_table->erp_index_bitmap,
+                                   erp_table->num_max_atcam_erps);
+       if (index < erp_table->num_max_atcam_erps) {
+               __set_bit(index, erp_table->erp_index_bitmap);
+               *p_index = index;
+               return 0;
+       }
+
+       return -ENOBUFS;
+}
+
+static void mlxsw_sp_acl_erp_index_put(struct mlxsw_sp_acl_erp_table *erp_table,
+                                      u8 index)
+{
+       __clear_bit(index, erp_table->erp_index_bitmap);
+}
+
+static void
+mlxsw_sp_acl_erp_table_locate(const struct mlxsw_sp_acl_erp_table *erp_table,
+                             const struct mlxsw_sp_acl_erp *erp,
+                             u8 *p_erpt_bank, u8 *p_erpt_index)
+{
+       unsigned int entry_size = mlxsw_sp_acl_erp_table_entry_size(erp_table);
+       struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
+       unsigned int row;
+
+       *p_erpt_bank = erp->index % erp_core->num_erp_banks;
+       row = erp->index / erp_core->num_erp_banks;
+       *p_erpt_index = erp_table->base_index + row * entry_size;
+}
+
+static int
+mlxsw_sp_acl_erp_table_erp_add(struct mlxsw_sp_acl_erp_table *erp_table,
+                              struct mlxsw_sp_acl_erp *erp)
+{
+       struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+       enum mlxsw_reg_perpt_key_size key_size;
+       char perpt_pl[MLXSW_REG_PERPT_LEN];
+       u8 erpt_bank, erpt_index;
+
+       mlxsw_sp_acl_erp_table_locate(erp_table, erp, &erpt_bank, &erpt_index);
+       key_size = (enum mlxsw_reg_perpt_key_size) erp_table->aregion->type;
+       mlxsw_reg_perpt_pack(perpt_pl, erpt_bank, erpt_index, key_size, erp->id,
+                            0, erp_table->base_index, erp->index,
+                            erp->key.mask);
+       mlxsw_reg_perpt_erp_vector_pack(perpt_pl, erp_table->erp_index_bitmap,
+                                       MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+       mlxsw_reg_perpt_erp_vector_set(perpt_pl, erp->index, true);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(perpt), perpt_pl);
+}
+
+static void mlxsw_sp_acl_erp_table_erp_del(struct mlxsw_sp_acl_erp *erp)
+{
+       char empty_mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN] = { 0 };
+       struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table;
+       struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+       enum mlxsw_reg_perpt_key_size key_size;
+       char perpt_pl[MLXSW_REG_PERPT_LEN];
+       u8 erpt_bank, erpt_index;
+
+       mlxsw_sp_acl_erp_table_locate(erp_table, erp, &erpt_bank, &erpt_index);
+       key_size = (enum mlxsw_reg_perpt_key_size) erp_table->aregion->type;
+       mlxsw_reg_perpt_pack(perpt_pl, erpt_bank, erpt_index, key_size, erp->id,
+                            0, erp_table->base_index, erp->index, empty_mask);
+       mlxsw_reg_perpt_erp_vector_pack(perpt_pl, erp_table->erp_index_bitmap,
+                                       MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+       mlxsw_reg_perpt_erp_vector_set(perpt_pl, erp->index, false);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(perpt), perpt_pl);
+}
+
+static int
+mlxsw_sp_acl_erp_table_enable(struct mlxsw_sp_acl_erp_table *erp_table,
+                             bool ctcam_le)
+{
+       struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region;
+       struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+       char pererp_pl[MLXSW_REG_PERERP_LEN];
+
+       mlxsw_reg_pererp_pack(pererp_pl, region->id, ctcam_le, true, 0,
+                             erp_table->base_index, 0);
+       mlxsw_reg_pererp_erp_vector_pack(pererp_pl, erp_table->erp_index_bitmap,
+                                        MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
+}
+
+static void
+mlxsw_sp_acl_erp_table_disable(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region;
+       struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+       char pererp_pl[MLXSW_REG_PERERP_LEN];
+       struct mlxsw_sp_acl_erp *master_rp;
+
+       master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table);
+       /* It is possible we do not have a master RP when we disable the
+        * table when there are no rules in the A-TCAM and the last C-TCAM
+        * rule is deleted
+        */
+       mlxsw_reg_pererp_pack(pererp_pl, region->id, false, false, 0, 0,
+                             master_rp ? master_rp->id : 0);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
+}
+
+static int
+mlxsw_sp_acl_erp_table_relocate(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       struct mlxsw_sp_acl_erp *erp;
+       int err;
+
+       list_for_each_entry(erp, &erp_table->atcam_erps_list, list) {
+               err = mlxsw_sp_acl_erp_table_erp_add(erp_table, erp);
+               if (err)
+                       goto err_table_erp_add;
+       }
+
+       return 0;
+
+err_table_erp_add:
+       list_for_each_entry_continue_reverse(erp, &erp_table->atcam_erps_list,
+                                            list)
+               mlxsw_sp_acl_erp_table_erp_del(erp);
+       return err;
+}
+
+static int
+mlxsw_sp_acl_erp_table_expand(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       unsigned int num_erps, old_num_erps = erp_table->num_max_atcam_erps;
+       struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
+       unsigned long old_base_index = erp_table->base_index;
+       bool ctcam_le = erp_table->num_ctcam_erps > 0;
+       int err;
+
+       if (erp_table->num_atcam_erps < erp_table->num_max_atcam_erps)
+               return 0;
+
+       if (erp_table->num_max_atcam_erps == MLXSW_SP_ACL_ERP_MAX_PER_REGION)
+               return -ENOBUFS;
+
+       num_erps = old_num_erps + erp_core->num_erp_banks;
+       err = mlxsw_sp_acl_erp_table_alloc(erp_core, num_erps,
+                                          erp_table->aregion->type,
+                                          &erp_table->base_index);
+       if (err)
+               return err;
+       erp_table->num_max_atcam_erps = num_erps;
+
+       err = mlxsw_sp_acl_erp_table_relocate(erp_table);
+       if (err)
+               goto err_table_relocate;
+
+       err = mlxsw_sp_acl_erp_table_enable(erp_table, ctcam_le);
+       if (err)
+               goto err_table_enable;
+
+       mlxsw_sp_acl_erp_table_free(erp_core, old_num_erps,
+                                   erp_table->aregion->type, old_base_index);
+
+       return 0;
+
+err_table_enable:
+err_table_relocate:
+       erp_table->num_max_atcam_erps = old_num_erps;
+       mlxsw_sp_acl_erp_table_free(erp_core, num_erps,
+                                   erp_table->aregion->type,
+                                   erp_table->base_index);
+       erp_table->base_index = old_base_index;
+       return err;
+}
+
+static int
+mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
+       struct mlxsw_sp_acl_erp *master_rp;
+       int err;
+
+       /* Initially, allocate a single eRP row. Expand later as needed */
+       err = mlxsw_sp_acl_erp_table_alloc(erp_core, erp_core->num_erp_banks,
+                                          erp_table->aregion->type,
+                                          &erp_table->base_index);
+       if (err)
+               return err;
+       erp_table->num_max_atcam_erps = erp_core->num_erp_banks;
+
+       /* Transition the sole RP currently configured (the master RP)
+        * to the eRP table
+        */
+       master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table);
+       if (!master_rp) {
+               err = -EINVAL;
+               goto err_table_master_rp;
+       }
+
+       /* Maintain the same eRP bank for the master RP, so that we
+        * wouldn't need to update the bloom filter
+        */
+       master_rp->index = master_rp->index % erp_core->num_erp_banks;
+       __set_bit(master_rp->index, erp_table->erp_index_bitmap);
+
+       err = mlxsw_sp_acl_erp_table_erp_add(erp_table, master_rp);
+       if (err)
+               goto err_table_master_rp_add;
+
+       err = mlxsw_sp_acl_erp_table_enable(erp_table, false);
+       if (err)
+               goto err_table_enable;
+
+       return 0;
+
+err_table_enable:
+       mlxsw_sp_acl_erp_table_erp_del(master_rp);
+err_table_master_rp_add:
+       __clear_bit(master_rp->index, erp_table->erp_index_bitmap);
+err_table_master_rp:
+       mlxsw_sp_acl_erp_table_free(erp_core, erp_table->num_max_atcam_erps,
+                                   erp_table->aregion->type,
+                                   erp_table->base_index);
+       return err;
+}
+
+static void
+mlxsw_sp_acl_erp_region_master_mask_trans(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
+       struct mlxsw_sp_acl_erp *master_rp;
+
+       mlxsw_sp_acl_erp_table_disable(erp_table);
+       master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table);
+       if (!master_rp)
+               return;
+       mlxsw_sp_acl_erp_table_erp_del(master_rp);
+       __clear_bit(master_rp->index, erp_table->erp_index_bitmap);
+       mlxsw_sp_acl_erp_table_free(erp_core, erp_table->num_max_atcam_erps,
+                                   erp_table->aregion->type,
+                                   erp_table->base_index);
+}
+
+static int
+mlxsw_sp_acl_erp_region_erp_add(struct mlxsw_sp_acl_erp_table *erp_table,
+                               struct mlxsw_sp_acl_erp *erp)
+{
+       struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region;
+       struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+       bool ctcam_le = erp_table->num_ctcam_erps > 0;
+       char pererp_pl[MLXSW_REG_PERERP_LEN];
+
+       mlxsw_reg_pererp_pack(pererp_pl, region->id, ctcam_le, true, 0,
+                             erp_table->base_index, 0);
+       mlxsw_reg_pererp_erp_vector_pack(pererp_pl, erp_table->erp_index_bitmap,
+                                        MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+       mlxsw_reg_pererp_erpt_vector_set(pererp_pl, erp->index, true);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
+}
+
+static void mlxsw_sp_acl_erp_region_erp_del(struct mlxsw_sp_acl_erp *erp)
+{
+       struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table;
+       struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region;
+       struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+       bool ctcam_le = erp_table->num_ctcam_erps > 0;
+       char pererp_pl[MLXSW_REG_PERERP_LEN];
+
+       mlxsw_reg_pererp_pack(pererp_pl, region->id, ctcam_le, true, 0,
+                             erp_table->base_index, 0);
+       mlxsw_reg_pererp_erp_vector_pack(pererp_pl, erp_table->erp_index_bitmap,
+                                        MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+       mlxsw_reg_pererp_erpt_vector_set(pererp_pl, erp->index, false);
+
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
+}
+
+static int
+mlxsw_sp_acl_erp_region_ctcam_enable(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       /* No need to re-enable lookup in the C-TCAM */
+       if (erp_table->num_ctcam_erps > 1)
+               return 0;
+
+       return mlxsw_sp_acl_erp_table_enable(erp_table, true);
+}
+
+static void
+mlxsw_sp_acl_erp_region_ctcam_disable(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       /* Only disable C-TCAM lookup when last C-TCAM eRP is deleted */
+       if (erp_table->num_ctcam_erps > 1)
+               return;
+
+       mlxsw_sp_acl_erp_table_enable(erp_table, false);
+}
+
+static void
+mlxsw_sp_acl_erp_ctcam_table_ops_set(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       switch (erp_table->num_atcam_erps) {
+       case 2:
+               /* Keep using the eRP table, but correctly set the
+                * operations pointer so that when an A-TCAM eRP is
+                * deleted we will transition to use the master mask
+                */
+               erp_table->ops = &erp_two_masks_ops;
+               break;
+       case 1:
+               /* We only kept the eRP table because we had C-TCAM
+                * eRPs in use. Now that the last C-TCAM eRP is gone we
+                * can stop using the table and transition to use the
+                * master mask
+                */
+               mlxsw_sp_acl_erp_region_master_mask_trans(erp_table);
+               erp_table->ops = &erp_single_mask_ops;
+               break;
+       case 0:
+               /* There are no more eRPs of any kind used by the region
+                * so free its eRP table and transition to initial state
+                */
+               mlxsw_sp_acl_erp_table_disable(erp_table);
+               mlxsw_sp_acl_erp_table_free(erp_table->erp_core,
+                                           erp_table->num_max_atcam_erps,
+                                           erp_table->aregion->type,
+                                           erp_table->base_index);
+               erp_table->ops = &erp_no_mask_ops;
+               break;
+       default:
+               break;
+       }
+}
+
+static struct mlxsw_sp_acl_erp *
+__mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+                                    struct mlxsw_sp_acl_erp_key *key)
+{
+       struct mlxsw_sp_acl_erp *erp;
+       int err;
+
+       erp = kzalloc(sizeof(*erp), GFP_KERNEL);
+       if (!erp)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(&erp->key, key, sizeof(*key));
+       bitmap_from_arr32(erp->mask_bitmap, (u32 *) key->mask,
+                         MLXSW_SP_ACL_TCAM_MASK_LEN);
+       refcount_set(&erp->refcnt, 1);
+       erp_table->num_ctcam_erps++;
+       erp->erp_table = erp_table;
+
+       err = mlxsw_sp_acl_erp_master_mask_set(erp_table, erp);
+       if (err)
+               goto err_master_mask_set;
+
+       err = rhashtable_insert_fast(&erp_table->erp_ht, &erp->ht_node,
+                                    mlxsw_sp_acl_erp_ht_params);
+       if (err)
+               goto err_rhashtable_insert;
+
+       err = mlxsw_sp_acl_erp_region_ctcam_enable(erp_table);
+       if (err)
+               goto err_erp_region_ctcam_enable;
+
+       /* When C-TCAM is used, the eRP table must be used */
+       erp_table->ops = &erp_multiple_masks_ops;
+
+       return erp;
+
+err_erp_region_ctcam_enable:
+       rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node,
+                              mlxsw_sp_acl_erp_ht_params);
+err_rhashtable_insert:
+       mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
+err_master_mask_set:
+       erp_table->num_ctcam_erps--;
+       kfree(erp);
+       return ERR_PTR(err);
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+                                  struct mlxsw_sp_acl_erp_key *key)
+{
+       struct mlxsw_sp_acl_erp *erp;
+       int err;
+
+       /* There is a special situation where we need to spill rules
+        * into the C-TCAM, yet the region is still using a master
+        * mask and thus not performing a lookup in the C-TCAM. This
+        * can happen when two rules that only differ in priority - and
+        * thus sharing the same key - are programmed. In this case
+        * we transition the region to use an eRP table
+        */
+       err = mlxsw_sp_acl_erp_region_table_trans(erp_table);
+       if (err)
+               return ERR_PTR(err);
+
+       erp = __mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key);
+       if (IS_ERR(erp)) {
+               err = PTR_ERR(erp);
+               goto err_erp_create;
+       }
+
+       return erp;
+
+err_erp_create:
+       mlxsw_sp_acl_erp_region_master_mask_trans(erp_table);
+       return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_erp_ctcam_mask_destroy(struct mlxsw_sp_acl_erp *erp)
+{
+       struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table;
+
+       mlxsw_sp_acl_erp_region_ctcam_disable(erp_table);
+       rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node,
+                              mlxsw_sp_acl_erp_ht_params);
+       mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
+       erp_table->num_ctcam_erps--;
+       kfree(erp);
+
+       /* Once the last C-TCAM eRP was destroyed, the state we
+        * transition to depends on the number of A-TCAM eRPs currently
+        * in use
+        */
+       if (erp_table->num_ctcam_erps > 0)
+               return;
+       mlxsw_sp_acl_erp_ctcam_table_ops_set(erp_table);
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+                            struct mlxsw_sp_acl_erp_key *key)
+{
+       struct mlxsw_sp_acl_erp *erp;
+       int err;
+
+       if (key->ctcam)
+               return __mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key);
+
+       /* Expand the eRP table for the new eRP, if needed */
+       err = mlxsw_sp_acl_erp_table_expand(erp_table);
+       if (err)
+               return ERR_PTR(err);
+
+       erp = mlxsw_sp_acl_erp_generic_create(erp_table, key);
+       if (IS_ERR(erp))
+               return erp;
+
+       err = mlxsw_sp_acl_erp_index_get(erp_table, &erp->index);
+       if (err)
+               goto err_erp_index_get;
+
+       err = mlxsw_sp_acl_erp_table_erp_add(erp_table, erp);
+       if (err)
+               goto err_table_erp_add;
+
+       err = mlxsw_sp_acl_erp_region_erp_add(erp_table, erp);
+       if (err)
+               goto err_region_erp_add;
+
+       erp_table->ops = &erp_multiple_masks_ops;
+
+       return erp;
+
+err_region_erp_add:
+       mlxsw_sp_acl_erp_table_erp_del(erp);
+err_table_erp_add:
+       mlxsw_sp_acl_erp_index_put(erp_table, erp->index);
+err_erp_index_get:
+       mlxsw_sp_acl_erp_generic_destroy(erp);
+       return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_erp_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+                             struct mlxsw_sp_acl_erp *erp)
+{
+       if (erp->key.ctcam)
+               return mlxsw_sp_acl_erp_ctcam_mask_destroy(erp);
+
+       mlxsw_sp_acl_erp_region_erp_del(erp);
+       mlxsw_sp_acl_erp_table_erp_del(erp);
+       mlxsw_sp_acl_erp_index_put(erp_table, erp->index);
+       mlxsw_sp_acl_erp_generic_destroy(erp);
+
+       if (erp_table->num_atcam_erps == 2 && erp_table->num_ctcam_erps == 0)
+               erp_table->ops = &erp_two_masks_ops;
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_second_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+                                   struct mlxsw_sp_acl_erp_key *key)
+{
+       struct mlxsw_sp_acl_erp *erp;
+       int err;
+
+       if (key->ctcam)
+               return mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key);
+
+       /* Transition to use eRP table instead of master mask */
+       err = mlxsw_sp_acl_erp_region_table_trans(erp_table);
+       if (err)
+               return ERR_PTR(err);
+
+       erp = mlxsw_sp_acl_erp_generic_create(erp_table, key);
+       if (IS_ERR(erp)) {
+               err = PTR_ERR(erp);
+               goto err_erp_create;
+       }
+
+       err = mlxsw_sp_acl_erp_index_get(erp_table, &erp->index);
+       if (err)
+               goto err_erp_index_get;
+
+       err = mlxsw_sp_acl_erp_table_erp_add(erp_table, erp);
+       if (err)
+               goto err_table_erp_add;
+
+       err = mlxsw_sp_acl_erp_region_erp_add(erp_table, erp);
+       if (err)
+               goto err_region_erp_add;
+
+       erp_table->ops = &erp_two_masks_ops;
+
+       return erp;
+
+err_region_erp_add:
+       mlxsw_sp_acl_erp_table_erp_del(erp);
+err_table_erp_add:
+       mlxsw_sp_acl_erp_index_put(erp_table, erp->index);
+err_erp_index_get:
+       mlxsw_sp_acl_erp_generic_destroy(erp);
+err_erp_create:
+       mlxsw_sp_acl_erp_region_master_mask_trans(erp_table);
+       return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_erp_second_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+                                    struct mlxsw_sp_acl_erp *erp)
+{
+       if (erp->key.ctcam)
+               return mlxsw_sp_acl_erp_ctcam_mask_destroy(erp);
+
+       mlxsw_sp_acl_erp_region_erp_del(erp);
+       mlxsw_sp_acl_erp_table_erp_del(erp);
+       mlxsw_sp_acl_erp_index_put(erp_table, erp->index);
+       mlxsw_sp_acl_erp_generic_destroy(erp);
+       /* Transition to use master mask instead of eRP table */
+       mlxsw_sp_acl_erp_region_master_mask_trans(erp_table);
+
+       erp_table->ops = &erp_single_mask_ops;
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_first_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+                                  struct mlxsw_sp_acl_erp_key *key)
+{
+       struct mlxsw_sp_acl_erp *erp;
+
+       if (key->ctcam)
+               return ERR_PTR(-EINVAL);
+
+       erp = mlxsw_sp_acl_erp_generic_create(erp_table, key);
+       if (IS_ERR(erp))
+               return erp;
+
+       erp_table->ops = &erp_single_mask_ops;
+
+       return erp;
+}
+
+static void
+mlxsw_sp_acl_erp_first_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+                                   struct mlxsw_sp_acl_erp *erp)
+{
+       mlxsw_sp_acl_erp_generic_destroy(erp);
+       erp_table->ops = &erp_no_mask_ops;
+}
+
+static void
+mlxsw_sp_acl_erp_no_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+                                struct mlxsw_sp_acl_erp *erp)
+{
+       WARN_ON(1);
+}
+
+struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_get(struct mlxsw_sp_acl_atcam_region *aregion,
+                    const char *mask, bool ctcam)
+{
+       struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
+       struct mlxsw_sp_acl_erp_key key;
+       struct mlxsw_sp_acl_erp *erp;
+
+       /* eRPs are allocated from a shared resource, but currently all
+        * allocations are done under RTNL.
+        */
+       ASSERT_RTNL();
+
+       memcpy(key.mask, mask, MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
+       key.ctcam = ctcam;
+       erp = rhashtable_lookup_fast(&erp_table->erp_ht, &key,
+                                    mlxsw_sp_acl_erp_ht_params);
+       if (erp) {
+               refcount_inc(&erp->refcnt);
+               return erp;
+       }
+
+       return erp_table->ops->erp_create(erp_table, &key);
+}
+
+void mlxsw_sp_acl_erp_put(struct mlxsw_sp_acl_atcam_region *aregion,
+                         struct mlxsw_sp_acl_erp *erp)
+{
+       struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
+
+       ASSERT_RTNL();
+
+       if (!refcount_dec_and_test(&erp->refcnt))
+               return;
+
+       erp_table->ops->erp_destroy(erp_table, erp);
+}
+
+static struct mlxsw_sp_acl_erp_table *
+mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       struct mlxsw_sp_acl_erp_table *erp_table;
+       int err;
+
+       erp_table = kzalloc(sizeof(*erp_table), GFP_KERNEL);
+       if (!erp_table)
+               return ERR_PTR(-ENOMEM);
+
+       err = rhashtable_init(&erp_table->erp_ht, &mlxsw_sp_acl_erp_ht_params);
+       if (err)
+               goto err_rhashtable_init;
+
+       erp_table->erp_core = aregion->atcam->erp_core;
+       erp_table->ops = &erp_no_mask_ops;
+       INIT_LIST_HEAD(&erp_table->atcam_erps_list);
+       erp_table->aregion = aregion;
+
+       return erp_table;
+
+err_rhashtable_init:
+       kfree(erp_table);
+       return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_erp_table_destroy(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+       WARN_ON(!list_empty(&erp_table->atcam_erps_list));
+       rhashtable_destroy(&erp_table->erp_ht);
+       kfree(erp_table);
+}
+
+static int
+mlxsw_sp_acl_erp_master_mask_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp;
+       char percr_pl[MLXSW_REG_PERCR_LEN];
+
+       mlxsw_reg_percr_pack(percr_pl, aregion->region->id);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(percr), percr_pl);
+}
+
+static int
+mlxsw_sp_acl_erp_region_param_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp;
+       char pererp_pl[MLXSW_REG_PERERP_LEN];
+
+       mlxsw_reg_pererp_pack(pererp_pl, aregion->region->id, false, false, 0,
+                             0, 0);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
+}
+
+int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       struct mlxsw_sp_acl_erp_table *erp_table;
+       int err;
+
+       erp_table = mlxsw_sp_acl_erp_table_create(aregion);
+       if (IS_ERR(erp_table))
+               return PTR_ERR(erp_table);
+       aregion->erp_table = erp_table;
+
+       /* Initialize the region's master mask to all zeroes */
+       err = mlxsw_sp_acl_erp_master_mask_init(aregion);
+       if (err)
+               goto err_erp_master_mask_init;
+
+       /* Initialize the region to not use the eRP table */
+       err = mlxsw_sp_acl_erp_region_param_init(aregion);
+       if (err)
+               goto err_erp_region_param_init;
+
+       return 0;
+
+err_erp_region_param_init:
+err_erp_master_mask_init:
+       mlxsw_sp_acl_erp_table_destroy(erp_table);
+       return err;
+}
+
+void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+       mlxsw_sp_acl_erp_table_destroy(aregion->erp_table);
+}
+
+static int
+mlxsw_sp_acl_erp_tables_sizes_query(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_acl_erp_core *erp_core)
+{
+       unsigned int size;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_2KB) ||
+           !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_4KB) ||
+           !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_8KB) ||
+           !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_12KB))
+               return -EIO;
+
+       size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_2KB);
+       erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB] = size;
+
+       size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_4KB);
+       erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB] = size;
+
+       size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_8KB);
+       erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB] = size;
+
+       size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_12KB);
+       erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB] = size;
+
+       return 0;
+}
+
+static int mlxsw_sp_acl_erp_tables_init(struct mlxsw_sp *mlxsw_sp,
+                                       struct mlxsw_sp_acl_erp_core *erp_core)
+{
+       unsigned int erpt_bank_size;
+       int err;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_ERPT_BANK_SIZE) ||
+           !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_ERPT_BANKS))
+               return -EIO;
+       erpt_bank_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+                                           ACL_MAX_ERPT_BANK_SIZE);
+       erp_core->num_erp_banks = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+                                                    ACL_MAX_ERPT_BANKS);
+
+       erp_core->erp_tables = gen_pool_create(0, -1);
+       if (!erp_core->erp_tables)
+               return -ENOMEM;
+       gen_pool_set_algo(erp_core->erp_tables, gen_pool_best_fit, NULL);
+
+       err = gen_pool_add(erp_core->erp_tables,
+                          MLXSW_SP_ACL_ERP_GENALLOC_OFFSET, erpt_bank_size,
+                          -1);
+       if (err)
+               goto err_gen_pool_add;
+
+       /* Different regions require masks of different sizes */
+       err = mlxsw_sp_acl_erp_tables_sizes_query(mlxsw_sp, erp_core);
+       if (err)
+               goto err_erp_tables_sizes_query;
+
+       return 0;
+
+err_erp_tables_sizes_query:
+err_gen_pool_add:
+       gen_pool_destroy(erp_core->erp_tables);
+       return err;
+}
+
+static void mlxsw_sp_acl_erp_tables_fini(struct mlxsw_sp *mlxsw_sp,
+                                        struct mlxsw_sp_acl_erp_core *erp_core)
+{
+       gen_pool_destroy(erp_core->erp_tables);
+}
+
+int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp,
+                          struct mlxsw_sp_acl_atcam *atcam)
+{
+       struct mlxsw_sp_acl_erp_core *erp_core;
+       int err;
+
+       erp_core = kzalloc(sizeof(*erp_core), GFP_KERNEL);
+       if (!erp_core)
+               return -ENOMEM;
+       erp_core->mlxsw_sp = mlxsw_sp;
+       atcam->erp_core = erp_core;
+
+       err = mlxsw_sp_acl_erp_tables_init(mlxsw_sp, erp_core);
+       if (err)
+               goto err_erp_tables_init;
+
+       return 0;
+
+err_erp_tables_init:
+       kfree(erp_core);
+       return err;
+}
+
+void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp,
+                           struct mlxsw_sp_acl_atcam *atcam)
+{
+       mlxsw_sp_acl_erp_tables_fini(mlxsw_sp, atcam->erp_core);
+       kfree(atcam->erp_core);
+}
index 510ce48d87f7470fff02d524fe13416af2ae3c5f..bca0def756cda3bfd54423bd7f979e1ddf6194d1 100644 (file)
 #include "core_acl_flex_actions.h"
 #include "spectrum_span.h"
 
-#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
-
 static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
-                                    char *enc_actions, bool is_first)
+                                    char *enc_actions, bool is_first, bool ca)
 {
        struct mlxsw_sp *mlxsw_sp = priv;
        char pefa_pl[MLXSW_REG_PEFA_LEN];
@@ -53,11 +51,11 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
        if (is_first)
                return 0;
 
-       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ACT_EXT_SIZE,
-                                 &kvdl_index);
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+                                 1, &kvdl_index);
        if (err)
                return err;
-       mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions);
+       mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, ca, enc_actions);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
        if (err)
                goto err_pefa_write;
@@ -65,10 +63,25 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
        return 0;
 
 err_pefa_write:
-       mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+                          1, kvdl_index);
        return err;
 }
 
+static int mlxsw_sp1_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
+                                     char *enc_actions, bool is_first)
+{
+       return mlxsw_sp_act_kvdl_set_add(priv, p_kvdl_index, enc_actions,
+                                        is_first, false);
+}
+
+static int mlxsw_sp2_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
+                                     char *enc_actions, bool is_first)
+{
+       return mlxsw_sp_act_kvdl_set_add(priv, p_kvdl_index, enc_actions,
+                                        is_first, true);
+}
+
 static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index,
                                      bool is_first)
 {
@@ -76,7 +89,29 @@ static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index,
 
        if (is_first)
                return;
-       mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+                          1, kvdl_index);
+}
+
+static int mlxsw_sp1_act_kvdl_set_activity_get(void *priv, u32 kvdl_index,
+                                              bool *activity)
+{
+       return -EOPNOTSUPP;
+}
+
+static int mlxsw_sp2_act_kvdl_set_activity_get(void *priv, u32 kvdl_index,
+                                              bool *activity)
+{
+       struct mlxsw_sp *mlxsw_sp = priv;
+       char pefa_pl[MLXSW_REG_PEFA_LEN];
+       int err;
+
+       mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, true, NULL);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
+       if (err)
+               return err;
+       mlxsw_reg_pefa_unpack(pefa_pl, activity);
+       return 0;
 }
 
 static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
@@ -87,7 +122,8 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
        u32 kvdl_index;
        int err;
 
-       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index);
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+                                 1, &kvdl_index);
        if (err)
                return err;
        mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port);
@@ -98,7 +134,8 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
        return 0;
 
 err_ppbs_write:
-       mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+                          1, kvdl_index);
        return err;
 }
 
@@ -106,7 +143,8 @@ static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index)
 {
        struct mlxsw_sp *mlxsw_sp = priv;
 
-       mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+                          1, kvdl_index);
 }
 
 static int
@@ -154,22 +192,36 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
        mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
 }
 
-static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
-       .kvdl_set_add           = mlxsw_sp_act_kvdl_set_add,
+const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = {
+       .kvdl_set_add           = mlxsw_sp1_act_kvdl_set_add,
+       .kvdl_set_del           = mlxsw_sp_act_kvdl_set_del,
+       .kvdl_set_activity_get  = mlxsw_sp1_act_kvdl_set_activity_get,
+       .kvdl_fwd_entry_add     = mlxsw_sp_act_kvdl_fwd_entry_add,
+       .kvdl_fwd_entry_del     = mlxsw_sp_act_kvdl_fwd_entry_del,
+       .counter_index_get      = mlxsw_sp_act_counter_index_get,
+       .counter_index_put      = mlxsw_sp_act_counter_index_put,
+       .mirror_add             = mlxsw_sp_act_mirror_add,
+       .mirror_del             = mlxsw_sp_act_mirror_del,
+};
+
+const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops = {
+       .kvdl_set_add           = mlxsw_sp2_act_kvdl_set_add,
        .kvdl_set_del           = mlxsw_sp_act_kvdl_set_del,
+       .kvdl_set_activity_get  = mlxsw_sp2_act_kvdl_set_activity_get,
        .kvdl_fwd_entry_add     = mlxsw_sp_act_kvdl_fwd_entry_add,
        .kvdl_fwd_entry_del     = mlxsw_sp_act_kvdl_fwd_entry_del,
        .counter_index_get      = mlxsw_sp_act_counter_index_get,
        .counter_index_put      = mlxsw_sp_act_counter_index_put,
        .mirror_add             = mlxsw_sp_act_mirror_add,
        .mirror_del             = mlxsw_sp_act_mirror_del,
+       .dummy_first_set        = true,
 };
 
 int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp)
 {
        mlxsw_sp->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
                                                            ACL_ACTIONS_PER_SET),
-                                        &mlxsw_sp_act_afa_ops, mlxsw_sp);
+                                        mlxsw_sp->afa_ops, mlxsw_sp);
        return PTR_ERR_OR_ZERO(mlxsw_sp->afa);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
new file mode 100644 (file)
index 0000000..aa8927c
--- /dev/null
@@ -0,0 +1,316 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include "spectrum.h"
+#include "item.h"
+#include "core_acl_flex_keys.h"
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(DMAC_32_47, 0x00, 2),
+       MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x02, 4),
+       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
+       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
+       MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x00, 2),
+       MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x02, 4),
+       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
+       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
+       MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x02, 2),
+       MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4),
+       MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x0C, 0, 16),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
+       MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
+       MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6),
+       MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12),
+       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3),
+       MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16),
+       MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x0C, 0, 16),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_dip[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_32_63, 0x00, 4),
+       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_ex1[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_96_127, 0x00, 4),
+       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_64_95, 0x04, 4),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_32_63, 0x00, 4),
+       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip_ex[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_96_127, 0x00, 4),
+       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_64_95, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_packet_type[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x00, 0, 16),
+};
+
+static const struct mlxsw_afk_block mlxsw_sp1_afk_blocks[] = {
+       MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_l2_dmac),
+       MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_l2_smac),
+       MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_l2_smac_ex),
+       MLXSW_AFK_BLOCK(0x30, mlxsw_sp_afk_element_info_ipv4_sip),
+       MLXSW_AFK_BLOCK(0x31, mlxsw_sp_afk_element_info_ipv4_dip),
+       MLXSW_AFK_BLOCK(0x32, mlxsw_sp_afk_element_info_ipv4),
+       MLXSW_AFK_BLOCK(0x33, mlxsw_sp_afk_element_info_ipv4_ex),
+       MLXSW_AFK_BLOCK(0x60, mlxsw_sp_afk_element_info_ipv6_dip),
+       MLXSW_AFK_BLOCK(0x65, mlxsw_sp_afk_element_info_ipv6_ex1),
+       MLXSW_AFK_BLOCK(0x62, mlxsw_sp_afk_element_info_ipv6_sip),
+       MLXSW_AFK_BLOCK(0x63, mlxsw_sp_afk_element_info_ipv6_sip_ex),
+       MLXSW_AFK_BLOCK(0xB0, mlxsw_sp_afk_element_info_packet_type),
+};
+
+#define MLXSW_SP1_AFK_KEY_BLOCK_SIZE 16
+
+static void mlxsw_sp1_afk_encode_block(char *block, int block_index,
+                                      char *output)
+{
+       unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE;
+       char *output_indexed = output + offset;
+
+       memcpy(output_indexed, block, MLXSW_SP1_AFK_KEY_BLOCK_SIZE);
+}
+
+const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = {
+       .blocks         = mlxsw_sp1_afk_blocks,
+       .blocks_count   = ARRAY_SIZE(mlxsw_sp1_afk_blocks),
+       .encode_block   = mlxsw_sp1_afk_encode_block,
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_0[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_1[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_2[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x04, 2),
+       MLXSW_AFK_ELEMENT_INST_BUF(DMAC_32_47, 0x06, 2),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_3[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x00, 0, 3),
+       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12),
+       MLXSW_AFK_ELEMENT_INST_BUF(DMAC_32_47, 0x06, 2),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_4[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x00, 0, 3),
+       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12),
+       MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x04, 0, 16),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12),
+       MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x04, 0, 8), /* RX_ACL_SYSTEM_PORT */
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_0[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_1[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x04, 0, 6),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 6, 2),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 8, 8),
+       MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x04, 16, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_32_63, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_1[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_64_95, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_96_127, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_3[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_32_63, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_4[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_64_95, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_5[] = {
+       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_96_127, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l4_0[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x04, 16, 16),
+       MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x04, 0, 16),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l4_2[] = {
+       MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x04, 16, 9), /* TCP_CONTROL + TCP_ECN */
+};
+
+static const struct mlxsw_afk_block mlxsw_sp2_afk_blocks[] = {
+       MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_mac_0),
+       MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_mac_1),
+       MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_mac_2),
+       MLXSW_AFK_BLOCK(0x13, mlxsw_sp_afk_element_info_mac_3),
+       MLXSW_AFK_BLOCK(0x14, mlxsw_sp_afk_element_info_mac_4),
+       MLXSW_AFK_BLOCK(0x15, mlxsw_sp_afk_element_info_mac_5),
+       MLXSW_AFK_BLOCK(0x38, mlxsw_sp_afk_element_info_ipv4_0),
+       MLXSW_AFK_BLOCK(0x39, mlxsw_sp_afk_element_info_ipv4_1),
+       MLXSW_AFK_BLOCK(0x3A, mlxsw_sp_afk_element_info_ipv4_2),
+       MLXSW_AFK_BLOCK(0x40, mlxsw_sp_afk_element_info_ipv6_0),
+       MLXSW_AFK_BLOCK(0x41, mlxsw_sp_afk_element_info_ipv6_1),
+       MLXSW_AFK_BLOCK(0x42, mlxsw_sp_afk_element_info_ipv6_2),
+       MLXSW_AFK_BLOCK(0x43, mlxsw_sp_afk_element_info_ipv6_3),
+       MLXSW_AFK_BLOCK(0x44, mlxsw_sp_afk_element_info_ipv6_4),
+       MLXSW_AFK_BLOCK(0x45, mlxsw_sp_afk_element_info_ipv6_5),
+       MLXSW_AFK_BLOCK(0x90, mlxsw_sp_afk_element_info_l4_0),
+       MLXSW_AFK_BLOCK(0x92, mlxsw_sp_afk_element_info_l4_2),
+};
+
+#define MLXSW_SP2_AFK_BITS_PER_BLOCK 36
+
+/* A block in Spectrum-2 is of the following form:
+ *
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |35|34|33|32|
+ * +-----------------------------------------------------------------------------------------------+
+ * |31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ */
+MLXSW_ITEM64(sp2_afk, block, value, 0x00, 0, MLXSW_SP2_AFK_BITS_PER_BLOCK);
+
+/* The key / mask block layout in Spectrum-2 is of the following form:
+ *
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |                block11_high                   |
+ * +-----------------------------------------------------------------------------------------------+
+ * |                    block11_low                               |         block10_high           |
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * ...
+ */
+
+struct mlxsw_sp2_afk_block_layout {
+       unsigned short offset;
+       struct mlxsw_item item;
+};
+
+#define MLXSW_SP2_AFK_BLOCK_LAYOUT(_block, _offset, _shift)                    \
+       {                                                                       \
+               .offset = _offset,                                              \
+               {                                                               \
+                       .shift = _shift,                                        \
+                       .size = {.bits = MLXSW_SP2_AFK_BITS_PER_BLOCK},         \
+                       .name = #_block,                                        \
+               }                                                               \
+       }                                                                       \
+
+static const struct mlxsw_sp2_afk_block_layout mlxsw_sp2_afk_blocks_layout[] = {
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block0, 0x30, 0),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block1, 0x2C, 4),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block2, 0x28, 8),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block3, 0x24, 12),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block4, 0x20, 16),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block5, 0x1C, 20),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block6, 0x18, 24),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block7, 0x14, 28),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block8, 0x0C, 0),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block9, 0x08, 4),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block10, 0x04, 8),
+       MLXSW_SP2_AFK_BLOCK_LAYOUT(block11, 0x00, 12),
+};
+
+static void mlxsw_sp2_afk_encode_block(char *block, int block_index,
+                                      char *output)
+{
+       u64 block_value = mlxsw_sp2_afk_block_value_get(block);
+       const struct mlxsw_sp2_afk_block_layout *block_layout;
+
+       if (WARN_ON(block_index < 0 ||
+                   block_index >= ARRAY_SIZE(mlxsw_sp2_afk_blocks_layout)))
+               return;
+
+       block_layout = &mlxsw_sp2_afk_blocks_layout[block_index];
+       __mlxsw_item_set64(output + block_layout->offset,
+                          &block_layout->item, 0, block_value);
+}
+
+const struct mlxsw_afk_ops mlxsw_sp2_afk_ops = {
+       .blocks         = mlxsw_sp2_afk_blocks,
+       .blocks_count   = ARRAY_SIZE(mlxsw_sp2_afk_blocks),
+       .encode_block   = mlxsw_sp2_afk_encode_block,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
deleted file mode 100644 (file)
index fb80318..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
-#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
-
-#include "core_acl_flex_keys.h"
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = {
-       MLXSW_AFK_ELEMENT_INST_BUF(DMAC, 0x00, 6),
-       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
-       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
-       MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
-       MLXSW_AFK_ELEMENT_INST_BUF(SMAC, 0x00, 6),
-       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
-       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
-       MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
-       MLXSW_AFK_ELEMENT_INST_BUF(SMAC, 0x02, 6),
-       MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = {
-       MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32),
-       MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
-       MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
-       MLXSW_AFK_ELEMENT_INST_U32(DST_IP4, 0x00, 0, 32),
-       MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
-       MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
-       MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32),
-       MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2),
-       MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8),
-       MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6),
-       MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = {
-       MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12),
-       MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3),
-       MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16),
-       MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x0C, 0, 16),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_dip[] = {
-       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP6_LO, 0x00, 8),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_ex1[] = {
-       MLXSW_AFK_ELEMENT_INST_BUF(DST_IP6_HI, 0x00, 8),
-       MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip[] = {
-       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP6_LO, 0x00, 8),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip_ex[] = {
-       MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP6_HI, 0x00, 8),
-};
-
-static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_packet_type[] = {
-       MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x00, 0, 16),
-};
-
-static const struct mlxsw_afk_block mlxsw_sp_afk_blocks[] = {
-       MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_l2_dmac),
-       MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_l2_smac),
-       MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_l2_smac_ex),
-       MLXSW_AFK_BLOCK(0x30, mlxsw_sp_afk_element_info_ipv4_sip),
-       MLXSW_AFK_BLOCK(0x31, mlxsw_sp_afk_element_info_ipv4_dip),
-       MLXSW_AFK_BLOCK(0x32, mlxsw_sp_afk_element_info_ipv4),
-       MLXSW_AFK_BLOCK(0x33, mlxsw_sp_afk_element_info_ipv4_ex),
-       MLXSW_AFK_BLOCK(0x60, mlxsw_sp_afk_element_info_ipv6_dip),
-       MLXSW_AFK_BLOCK(0x65, mlxsw_sp_afk_element_info_ipv6_ex1),
-       MLXSW_AFK_BLOCK(0x62, mlxsw_sp_afk_element_info_ipv6_sip),
-       MLXSW_AFK_BLOCK(0x63, mlxsw_sp_afk_element_info_ipv6_sip_ex),
-       MLXSW_AFK_BLOCK(0xB0, mlxsw_sp_afk_element_info_packet_type),
-};
-
-#define MLXSW_SP_AFK_BLOCKS_COUNT ARRAY_SIZE(mlxsw_sp_afk_blocks)
-
-#endif
index ad1b548e3cace26f149ec9b9ac7eea1e493f8b6e..245e2f473c6f512ba48bc69678394624e8ac25e1 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
-#include <linux/parman.h>
 
 #include "reg.h"
 #include "core.h"
 #include "resources.h"
 #include "spectrum.h"
+#include "spectrum_acl_tcam.h"
 #include "core_acl_flex_keys.h"
 
-struct mlxsw_sp_acl_tcam {
-       unsigned long *used_regions; /* bit array */
-       unsigned int max_regions;
-       unsigned long *used_groups;  /* bit array */
-       unsigned int max_groups;
-       unsigned int max_group_size;
-};
+size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+       return ops->priv_size;
+}
 
-static int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
+                          struct mlxsw_sp_acl_tcam *tcam)
 {
-       struct mlxsw_sp_acl_tcam *tcam = priv;
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
        u64 max_tcam_regions;
        u64 max_regions;
        u64 max_groups;
@@ -88,21 +88,53 @@ static int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
        tcam->max_groups = max_groups;
        tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
                                                 ACL_MAX_GROUP_SIZE);
+
+       err = ops->init(mlxsw_sp, tcam->priv, tcam);
+       if (err)
+               goto err_tcam_init;
+
        return 0;
 
+err_tcam_init:
+       kfree(tcam->used_groups);
 err_alloc_used_groups:
        kfree(tcam->used_regions);
        return err;
 }
 
-static void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
+                           struct mlxsw_sp_acl_tcam *tcam)
 {
-       struct mlxsw_sp_acl_tcam *tcam = priv;
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 
+       ops->fini(mlxsw_sp, tcam->priv);
        kfree(tcam->used_groups);
        kfree(tcam->used_regions);
 }
 
+int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_acl_rule_info *rulei,
+                                  u32 *priority, bool fillup_priority)
+{
+       u64 max_priority;
+
+       if (!fillup_priority) {
+               *priority = 0;
+               return 0;
+       }
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, KVD_SIZE))
+               return -EIO;
+
+       max_priority = MLXSW_CORE_RES_GET(mlxsw_sp->core, KVD_SIZE);
+       if (rulei->priority > max_priority)
+               return -EINVAL;
+
+       /* Unlike in TC, in HW, higher number means higher priority. */
+       *priority = max_priority - rulei->priority;
+       return 0;
+}
+
 static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam,
                                           u16 *p_id)
 {
@@ -157,37 +189,25 @@ struct mlxsw_sp_acl_tcam_group {
        struct mlxsw_sp_acl_tcam_group_ops *ops;
        const struct mlxsw_sp_acl_tcam_pattern *patterns;
        unsigned int patterns_count;
-};
-
-struct mlxsw_sp_acl_tcam_region {
-       struct list_head list; /* Member of a TCAM group */
-       struct list_head chunk_list; /* List of chunks under this region */
-       struct parman *parman;
-       struct mlxsw_sp *mlxsw_sp;
-       struct mlxsw_sp_acl_tcam_group *group;
-       u16 id; /* ACL ID and region ID - they are same */
-       char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN];
-       struct mlxsw_afk_key_info *key_info;
-       struct {
-               struct parman_prio parman_prio;
-               struct parman_item parman_item;
-               struct mlxsw_sp_acl_rule_info *rulei;
-       } catchall;
+       bool tmplt_elusage_set;
+       struct mlxsw_afk_element_usage tmplt_elusage;
 };
 
 struct mlxsw_sp_acl_tcam_chunk {
        struct list_head list; /* Member of a TCAM region */
        struct rhash_head ht_node; /* Member of a chunk HT */
        unsigned int priority; /* Priority within the region and group */
-       struct parman_prio parman_prio;
        struct mlxsw_sp_acl_tcam_group *group;
        struct mlxsw_sp_acl_tcam_region *region;
        unsigned int ref_count;
+       unsigned long priv[0];
+       /* priv has to be always the last item */
 };
 
 struct mlxsw_sp_acl_tcam_entry {
-       struct parman_item parman_item;
        struct mlxsw_sp_acl_tcam_chunk *chunk;
+       unsigned long priv[0];
+       /* priv has to be always the last item */
 };
 
 static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = {
@@ -216,13 +236,19 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
                            struct mlxsw_sp_acl_tcam *tcam,
                            struct mlxsw_sp_acl_tcam_group *group,
                            const struct mlxsw_sp_acl_tcam_pattern *patterns,
-                           unsigned int patterns_count)
+                           unsigned int patterns_count,
+                           struct mlxsw_afk_element_usage *tmplt_elusage)
 {
        int err;
 
        group->tcam = tcam;
        group->patterns = patterns;
        group->patterns_count = patterns_count;
+       if (tmplt_elusage) {
+               group->tmplt_elusage_set = true;
+               memcpy(&group->tmplt_elusage, tmplt_elusage,
+                      sizeof(group->tmplt_elusage));
+       }
        INIT_LIST_HEAD(&group->region_list);
        err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id);
        if (err)
@@ -431,6 +457,15 @@ mlxsw_sp_acl_tcam_group_use_patterns(struct mlxsw_sp_acl_tcam_group *group,
        const struct mlxsw_sp_acl_tcam_pattern *pattern;
        int i;
 
+       /* In case the template is set, we don't have to look up the pattern
+        * and just use the template.
+        */
+       if (group->tmplt_elusage_set) {
+               memcpy(out, &group->tmplt_elusage, sizeof(*out));
+               WARN_ON(!mlxsw_afk_element_usage_subset(elusage, out));
+               return;
+       }
+
        for (i = 0; i < group->patterns_count; i++) {
                pattern = &group->patterns[i];
                mlxsw_afk_element_usage_fill(out, pattern->elements,
@@ -441,9 +476,6 @@ mlxsw_sp_acl_tcam_group_use_patterns(struct mlxsw_sp_acl_tcam_group *group,
        memcpy(out, elusage, sizeof(*out));
 }
 
-#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16
-#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16
-
 static int
 mlxsw_sp_acl_tcam_region_alloc(struct mlxsw_sp *mlxsw_sp,
                               struct mlxsw_sp_acl_tcam_region *region)
@@ -455,6 +487,7 @@ mlxsw_sp_acl_tcam_region_alloc(struct mlxsw_sp *mlxsw_sp,
        int err;
 
        mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_ALLOC,
+                           region->key_type,
                            MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT,
                            region->id, region->tcam_region_info);
        encodings_count = mlxsw_afk_key_info_blocks_count_get(key_info);
@@ -477,23 +510,12 @@ mlxsw_sp_acl_tcam_region_free(struct mlxsw_sp *mlxsw_sp,
 {
        char ptar_pl[MLXSW_REG_PTAR_LEN];
 
-       mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_FREE, 0, region->id,
+       mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_FREE,
+                           region->key_type, 0, region->id,
                            region->tcam_region_info);
        mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
 }
 
-static int
-mlxsw_sp_acl_tcam_region_resize(struct mlxsw_sp *mlxsw_sp,
-                               struct mlxsw_sp_acl_tcam_region *region,
-                               u16 new_size)
-{
-       char ptar_pl[MLXSW_REG_PTAR_LEN];
-
-       mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE,
-                           new_size, region->id, region->tcam_region_info);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
-}
-
 static int
 mlxsw_sp_acl_tcam_region_enable(struct mlxsw_sp *mlxsw_sp,
                                struct mlxsw_sp_acl_tcam_region *region)
@@ -516,193 +538,22 @@ mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp,
        mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pacl), pacl_pl);
 }
 
-static int
-mlxsw_sp_acl_tcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp,
-                                     struct mlxsw_sp_acl_tcam_region *region,
-                                     unsigned int offset,
-                                     struct mlxsw_sp_acl_rule_info *rulei)
-{
-       char ptce2_pl[MLXSW_REG_PTCE2_LEN];
-       char *act_set;
-       char *mask;
-       char *key;
-
-       mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
-                            region->tcam_region_info, offset);
-       key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl);
-       mask = mlxsw_reg_ptce2_mask_data(ptce2_pl);
-       mlxsw_afk_encode(region->key_info, &rulei->values, key, mask);
-
-       /* Only the first action set belongs here, the rest is in KVD */
-       act_set = mlxsw_afa_block_first_set(rulei->act_block);
-       mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set);
-
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
-}
-
-static void
-mlxsw_sp_acl_tcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp,
-                                     struct mlxsw_sp_acl_tcam_region *region,
-                                     unsigned int offset)
-{
-       char ptce2_pl[MLXSW_REG_PTCE2_LEN];
-
-       mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
-                            region->tcam_region_info, offset);
-       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
-}
-
-static int
-mlxsw_sp_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
-                                           struct mlxsw_sp_acl_tcam_region *region,
-                                           unsigned int offset,
-                                           bool *activity)
-{
-       char ptce2_pl[MLXSW_REG_PTCE2_LEN];
-       int err;
-
-       mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ,
-                            region->tcam_region_info, offset);
-       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
-       if (err)
-               return err;
-       *activity = mlxsw_reg_ptce2_a_get(ptce2_pl);
-       return 0;
-}
-
-#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U)
-
-static int
-mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
-                                     struct mlxsw_sp_acl_tcam_region *region)
-{
-       struct parman_prio *parman_prio = &region->catchall.parman_prio;
-       struct parman_item *parman_item = &region->catchall.parman_item;
-       struct mlxsw_sp_acl_rule_info *rulei;
-       int err;
-
-       parman_prio_init(region->parman, parman_prio,
-                        MLXSW_SP_ACL_TCAM_CATCHALL_PRIO);
-       err = parman_item_add(region->parman, parman_prio, parman_item);
-       if (err)
-               goto err_parman_item_add;
-
-       rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl);
-       if (IS_ERR(rulei)) {
-               err = PTR_ERR(rulei);
-               goto err_rulei_create;
-       }
-
-       err = mlxsw_sp_acl_rulei_act_continue(rulei);
-       if (WARN_ON(err))
-               goto err_rulei_act_continue;
-
-       err = mlxsw_sp_acl_rulei_commit(rulei);
-       if (err)
-               goto err_rulei_commit;
-
-       err = mlxsw_sp_acl_tcam_region_entry_insert(mlxsw_sp, region,
-                                                   parman_item->index, rulei);
-       region->catchall.rulei = rulei;
-       if (err)
-               goto err_rule_insert;
-
-       return 0;
-
-err_rule_insert:
-err_rulei_commit:
-err_rulei_act_continue:
-       mlxsw_sp_acl_rulei_destroy(rulei);
-err_rulei_create:
-       parman_item_remove(region->parman, parman_prio, parman_item);
-err_parman_item_add:
-       parman_prio_fini(parman_prio);
-       return err;
-}
-
-static void
-mlxsw_sp_acl_tcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp,
-                                     struct mlxsw_sp_acl_tcam_region *region)
-{
-       struct parman_prio *parman_prio = &region->catchall.parman_prio;
-       struct parman_item *parman_item = &region->catchall.parman_item;
-       struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei;
-
-       mlxsw_sp_acl_tcam_region_entry_remove(mlxsw_sp, region,
-                                             parman_item->index);
-       mlxsw_sp_acl_rulei_destroy(rulei);
-       parman_item_remove(region->parman, parman_prio, parman_item);
-       parman_prio_fini(parman_prio);
-}
-
-static void
-mlxsw_sp_acl_tcam_region_move(struct mlxsw_sp *mlxsw_sp,
-                             struct mlxsw_sp_acl_tcam_region *region,
-                             u16 src_offset, u16 dst_offset, u16 size)
-{
-       char prcr_pl[MLXSW_REG_PRCR_LEN];
-
-       mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE,
-                           region->tcam_region_info, src_offset,
-                           region->tcam_region_info, dst_offset, size);
-       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl);
-}
-
-static int mlxsw_sp_acl_tcam_region_parman_resize(void *priv,
-                                                 unsigned long new_count)
-{
-       struct mlxsw_sp_acl_tcam_region *region = priv;
-       struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
-       u64 max_tcam_rules;
-
-       max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
-       if (new_count > max_tcam_rules)
-               return -EINVAL;
-       return mlxsw_sp_acl_tcam_region_resize(mlxsw_sp, region, new_count);
-}
-
-static void mlxsw_sp_acl_tcam_region_parman_move(void *priv,
-                                                unsigned long from_index,
-                                                unsigned long to_index,
-                                                unsigned long count)
-{
-       struct mlxsw_sp_acl_tcam_region *region = priv;
-       struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
-
-       mlxsw_sp_acl_tcam_region_move(mlxsw_sp, region,
-                                     from_index, to_index, count);
-}
-
-static const struct parman_ops mlxsw_sp_acl_tcam_region_parman_ops = {
-       .base_count     = MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT,
-       .resize_step    = MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP,
-       .resize         = mlxsw_sp_acl_tcam_region_parman_resize,
-       .move           = mlxsw_sp_acl_tcam_region_parman_move,
-       .algo           = PARMAN_ALGO_TYPE_LSORT,
-};
-
 static struct mlxsw_sp_acl_tcam_region *
 mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
                                struct mlxsw_sp_acl_tcam *tcam,
                                struct mlxsw_afk_element_usage *elusage)
 {
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
        struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
        struct mlxsw_sp_acl_tcam_region *region;
        int err;
 
-       region = kzalloc(sizeof(*region), GFP_KERNEL);
+       region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL);
        if (!region)
                return ERR_PTR(-ENOMEM);
        INIT_LIST_HEAD(&region->chunk_list);
        region->mlxsw_sp = mlxsw_sp;
 
-       region->parman = parman_create(&mlxsw_sp_acl_tcam_region_parman_ops,
-                                      region);
-       if (!region->parman) {
-               err = -ENOMEM;
-               goto err_parman_create;
-       }
-
        region->key_info = mlxsw_afk_key_info_get(afk, elusage);
        if (IS_ERR(region->key_info)) {
                err = PTR_ERR(region->key_info);
@@ -713,6 +564,11 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_region_id_get;
 
+       err = ops->region_associate(mlxsw_sp, region);
+       if (err)
+               goto err_tcam_region_associate;
+
+       region->key_type = ops->key_type;
        err = mlxsw_sp_acl_tcam_region_alloc(mlxsw_sp, region);
        if (err)
                goto err_tcam_region_alloc;
@@ -721,23 +577,22 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_tcam_region_enable;
 
-       err = mlxsw_sp_acl_tcam_region_catchall_add(mlxsw_sp, region);
+       err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, region);
        if (err)
-               goto err_tcam_region_catchall_add;
+               goto err_tcam_region_init;
 
        return region;
 
-err_tcam_region_catchall_add:
+err_tcam_region_init:
        mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region);
 err_tcam_region_enable:
        mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region);
 err_tcam_region_alloc:
+err_tcam_region_associate:
        mlxsw_sp_acl_tcam_region_id_put(tcam, region->id);
 err_region_id_get:
        mlxsw_afk_key_info_put(region->key_info);
 err_key_info_get:
-       parman_destroy(region->parman);
-err_parman_create:
        kfree(region);
        return ERR_PTR(err);
 }
@@ -746,12 +601,13 @@ static void
 mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp,
                                 struct mlxsw_sp_acl_tcam_region *region)
 {
-       mlxsw_sp_acl_tcam_region_catchall_del(mlxsw_sp, region);
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+       ops->region_fini(mlxsw_sp, region->priv);
        mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region);
        mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region);
        mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id);
        mlxsw_afk_key_info_put(region->key_info);
-       parman_destroy(region->parman);
        kfree(region);
 }
 
@@ -826,13 +682,14 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
                               unsigned int priority,
                               struct mlxsw_afk_element_usage *elusage)
 {
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
        struct mlxsw_sp_acl_tcam_chunk *chunk;
        int err;
 
        if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO)
                return ERR_PTR(-EINVAL);
 
-       chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+       chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL);
        if (!chunk)
                return ERR_PTR(-ENOMEM);
        chunk->priority = priority;
@@ -844,7 +701,7 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
        if (err)
                goto err_chunk_assoc;
 
-       parman_prio_init(chunk->region->parman, &chunk->parman_prio, priority);
+       ops->chunk_init(chunk->region->priv, chunk->priv, priority);
 
        err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node,
                                     mlxsw_sp_acl_tcam_chunk_ht_params);
@@ -854,7 +711,7 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
        return chunk;
 
 err_rhashtable_insert:
-       parman_prio_fini(&chunk->parman_prio);
+       ops->chunk_fini(chunk->priv);
        mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
 err_chunk_assoc:
        kfree(chunk);
@@ -865,11 +722,12 @@ static void
 mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp,
                                struct mlxsw_sp_acl_tcam_chunk *chunk)
 {
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
        struct mlxsw_sp_acl_tcam_group *group = chunk->group;
 
        rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node,
                               mlxsw_sp_acl_tcam_chunk_ht_params);
-       parman_prio_fini(&chunk->parman_prio);
+       ops->chunk_fini(chunk->priv);
        mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
        kfree(chunk);
 }
@@ -903,11 +761,19 @@ static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp,
        mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk);
 }
 
+static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+       return ops->entry_priv_size;
+}
+
 static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
                                       struct mlxsw_sp_acl_tcam_group *group,
                                       struct mlxsw_sp_acl_tcam_entry *entry,
                                       struct mlxsw_sp_acl_rule_info *rulei)
 {
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
        struct mlxsw_sp_acl_tcam_chunk *chunk;
        struct mlxsw_sp_acl_tcam_region *region;
        int err;
@@ -918,24 +784,16 @@ static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
                return PTR_ERR(chunk);
 
        region = chunk->region;
-       err = parman_item_add(region->parman, &chunk->parman_prio,
-                             &entry->parman_item);
-       if (err)
-               goto err_parman_item_add;
 
-       err = mlxsw_sp_acl_tcam_region_entry_insert(mlxsw_sp, region,
-                                                   entry->parman_item.index,
-                                                   rulei);
+       err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv,
+                            entry->priv, rulei);
        if (err)
-               goto err_rule_insert;
+               goto err_entry_add;
        entry->chunk = chunk;
 
        return 0;
 
-err_rule_insert:
-       parman_item_remove(region->parman, &chunk->parman_prio,
-                          &entry->parman_item);
-err_parman_item_add:
+err_entry_add:
        mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
        return err;
 }
@@ -943,13 +801,11 @@ err_parman_item_add:
 static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
                                        struct mlxsw_sp_acl_tcam_entry *entry)
 {
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
        struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
        struct mlxsw_sp_acl_tcam_region *region = chunk->region;
 
-       mlxsw_sp_acl_tcam_region_entry_remove(mlxsw_sp, region,
-                                             entry->parman_item.index);
-       parman_item_remove(region->parman, &chunk->parman_prio,
-                          &entry->parman_item);
+       ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv);
        mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
 }
 
@@ -958,22 +814,24 @@ mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
                                     struct mlxsw_sp_acl_tcam_entry *entry,
                                     bool *activity)
 {
+       const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
        struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
        struct mlxsw_sp_acl_tcam_region *region = chunk->region;
 
-       return mlxsw_sp_acl_tcam_region_entry_activity_get(mlxsw_sp, region,
-                                                          entry->parman_item.index,
-                                                          activity);
+       return ops->entry_activity_get(mlxsw_sp, region->priv,
+                                      entry->priv, activity);
 }
 
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
        MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
-       MLXSW_AFK_ELEMENT_DMAC,
-       MLXSW_AFK_ELEMENT_SMAC,
+       MLXSW_AFK_ELEMENT_DMAC_32_47,
+       MLXSW_AFK_ELEMENT_DMAC_0_31,
+       MLXSW_AFK_ELEMENT_SMAC_32_47,
+       MLXSW_AFK_ELEMENT_SMAC_0_31,
        MLXSW_AFK_ELEMENT_ETHERTYPE,
        MLXSW_AFK_ELEMENT_IP_PROTO,
-       MLXSW_AFK_ELEMENT_SRC_IP4,
-       MLXSW_AFK_ELEMENT_DST_IP4,
+       MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+       MLXSW_AFK_ELEMENT_DST_IP_0_31,
        MLXSW_AFK_ELEMENT_DST_L4_PORT,
        MLXSW_AFK_ELEMENT_SRC_L4_PORT,
        MLXSW_AFK_ELEMENT_VID,
@@ -987,10 +845,14 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = {
        MLXSW_AFK_ELEMENT_ETHERTYPE,
        MLXSW_AFK_ELEMENT_IP_PROTO,
-       MLXSW_AFK_ELEMENT_SRC_IP6_HI,
-       MLXSW_AFK_ELEMENT_SRC_IP6_LO,
-       MLXSW_AFK_ELEMENT_DST_IP6_HI,
-       MLXSW_AFK_ELEMENT_DST_IP6_LO,
+       MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+       MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+       MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+       MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+       MLXSW_AFK_ELEMENT_DST_IP_96_127,
+       MLXSW_AFK_ELEMENT_DST_IP_64_95,
+       MLXSW_AFK_ELEMENT_DST_IP_32_63,
+       MLXSW_AFK_ELEMENT_DST_IP_0_31,
        MLXSW_AFK_ELEMENT_DST_L4_PORT,
        MLXSW_AFK_ELEMENT_SRC_L4_PORT,
 };
@@ -1019,14 +881,16 @@ struct mlxsw_sp_acl_tcam_flower_rule {
 
 static int
 mlxsw_sp_acl_tcam_flower_ruleset_add(struct mlxsw_sp *mlxsw_sp,
-                                    void *priv, void *ruleset_priv)
+                                    struct mlxsw_sp_acl_tcam *tcam,
+                                    void *ruleset_priv,
+                                    struct mlxsw_afk_element_usage *tmplt_elusage)
 {
        struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
-       struct mlxsw_sp_acl_tcam *tcam = priv;
 
        return mlxsw_sp_acl_tcam_group_add(mlxsw_sp, tcam, &ruleset->group,
                                           mlxsw_sp_acl_tcam_patterns,
-                                          MLXSW_SP_ACL_TCAM_PATTERNS_COUNT);
+                                          MLXSW_SP_ACL_TCAM_PATTERNS_COUNT,
+                                          tmplt_elusage);
 }
 
 static void
@@ -1070,6 +934,12 @@ mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv)
        return mlxsw_sp_acl_tcam_group_id(&ruleset->group);
 }
 
+static size_t mlxsw_sp_acl_tcam_flower_rule_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+       return sizeof(struct mlxsw_sp_acl_tcam_flower_rule) +
+              mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp);
+}
+
 static int
 mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp,
                                  void *ruleset_priv, void *rule_priv,
@@ -1107,7 +977,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
        .ruleset_bind           = mlxsw_sp_acl_tcam_flower_ruleset_bind,
        .ruleset_unbind         = mlxsw_sp_acl_tcam_flower_ruleset_unbind,
        .ruleset_group_id       = mlxsw_sp_acl_tcam_flower_ruleset_group_id,
-       .rule_priv_size         = sizeof(struct mlxsw_sp_acl_tcam_flower_rule),
+       .rule_priv_size         = mlxsw_sp_acl_tcam_flower_rule_priv_size,
        .rule_add               = mlxsw_sp_acl_tcam_flower_rule_add,
        .rule_del               = mlxsw_sp_acl_tcam_flower_rule_del,
        .rule_activity_get      = mlxsw_sp_acl_tcam_flower_rule_activity_get,
@@ -1118,7 +988,7 @@ mlxsw_sp_acl_tcam_profile_ops_arr[] = {
        [MLXSW_SP_ACL_PROFILE_FLOWER] = &mlxsw_sp_acl_tcam_flower_ops,
 };
 
-static const struct mlxsw_sp_acl_profile_ops *
+const struct mlxsw_sp_acl_profile_ops *
 mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
                              enum mlxsw_sp_acl_profile profile)
 {
@@ -1131,10 +1001,3 @@ mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
                return NULL;
        return ops;
 }
-
-const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops = {
-       .priv_size              = sizeof(struct mlxsw_sp_acl_tcam),
-       .init                   = mlxsw_sp_acl_tcam_init,
-       .fini                   = mlxsw_sp_acl_tcam_fini,
-       .profile_ops            = mlxsw_sp_acl_tcam_profile_ops,
-};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
new file mode 100644 (file)
index 0000000..881ade7
--- /dev/null
@@ -0,0 +1,259 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_ACL_TCAM_H
+#define _MLXSW_SPECTRUM_ACL_TCAM_H
+
+#include <linux/list.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_keys.h"
+
+struct mlxsw_sp_acl_tcam {
+       unsigned long *used_regions; /* bit array */
+       unsigned int max_regions;
+       unsigned long *used_groups;  /* bit array */
+       unsigned int max_groups;
+       unsigned int max_group_size;
+       unsigned long priv[0];
+       /* priv has to be always the last item */
+};
+
+size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
+                          struct mlxsw_sp_acl_tcam *tcam);
+void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
+                           struct mlxsw_sp_acl_tcam *tcam);
+int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_acl_rule_info *rulei,
+                                  u32 *priority, bool fillup_priority);
+
+struct mlxsw_sp_acl_profile_ops {
+       size_t ruleset_priv_size;
+       int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp,
+                          struct mlxsw_sp_acl_tcam *tcam, void *ruleset_priv,
+                          struct mlxsw_afk_element_usage *tmplt_elusage);
+       void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv);
+       int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
+                           struct mlxsw_sp_port *mlxsw_sp_port,
+                           bool ingress);
+       void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
+                              struct mlxsw_sp_port *mlxsw_sp_port,
+                              bool ingress);
+       u16 (*ruleset_group_id)(void *ruleset_priv);
+       size_t (*rule_priv_size)(struct mlxsw_sp *mlxsw_sp);
+       int (*rule_add)(struct mlxsw_sp *mlxsw_sp,
+                       void *ruleset_priv, void *rule_priv,
+                       struct mlxsw_sp_acl_rule_info *rulei);
+       void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv);
+       int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv,
+                                bool *activity);
+};
+
+const struct mlxsw_sp_acl_profile_ops *
+mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
+                             enum mlxsw_sp_acl_profile profile);
+
+#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16
+#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16
+
+#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U)
+
+#define MLXSW_SP_ACL_TCAM_MASK_LEN \
+       (MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN * BITS_PER_BYTE)
+
+struct mlxsw_sp_acl_tcam_group;
+
+struct mlxsw_sp_acl_tcam_region {
+       struct list_head list; /* Member of a TCAM group */
+       struct list_head chunk_list; /* List of chunks under this region */
+       struct mlxsw_sp_acl_tcam_group *group;
+       enum mlxsw_reg_ptar_key_type key_type;
+       u16 id; /* ACL ID and region ID - they are same */
+       char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN];
+       struct mlxsw_afk_key_info *key_info;
+       struct mlxsw_sp *mlxsw_sp;
+       unsigned long priv[0];
+       /* priv has to be always the last item */
+};
+
+struct mlxsw_sp_acl_ctcam_region {
+       struct parman *parman;
+       const struct mlxsw_sp_acl_ctcam_region_ops *ops;
+       struct mlxsw_sp_acl_tcam_region *region;
+};
+
+struct mlxsw_sp_acl_ctcam_chunk {
+       struct parman_prio parman_prio;
+};
+
+struct mlxsw_sp_acl_ctcam_entry {
+       struct parman_item parman_item;
+};
+
+struct mlxsw_sp_acl_ctcam_region_ops {
+       int (*entry_insert)(struct mlxsw_sp_acl_ctcam_region *cregion,
+                           struct mlxsw_sp_acl_ctcam_entry *centry,
+                           const char *mask);
+       void (*entry_remove)(struct mlxsw_sp_acl_ctcam_region *cregion,
+                            struct mlxsw_sp_acl_ctcam_entry *centry);
+};
+
+int
+mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_acl_ctcam_region *cregion,
+                              struct mlxsw_sp_acl_tcam_region *region,
+                              const struct mlxsw_sp_acl_ctcam_region_ops *ops);
+void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion);
+void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion,
+                                  struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+                                  unsigned int priority);
+void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk);
+int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_ctcam_region *cregion,
+                                struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+                                struct mlxsw_sp_acl_ctcam_entry *centry,
+                                struct mlxsw_sp_acl_rule_info *rulei,
+                                bool fillup_priority);
+void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_acl_ctcam_region *cregion,
+                                 struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+                                 struct mlxsw_sp_acl_ctcam_entry *centry);
+static inline unsigned int
+mlxsw_sp_acl_ctcam_entry_offset(struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+       return centry->parman_item.index;
+}
+
+enum mlxsw_sp_acl_atcam_region_type {
+       MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB,
+       MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB,
+       MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB,
+       MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB,
+       __MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX,
+};
+
+#define MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX \
+       (__MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX - 1)
+
+struct mlxsw_sp_acl_atcam {
+       struct mlxsw_sp_acl_erp_core *erp_core;
+};
+
+struct mlxsw_sp_acl_atcam_region {
+       struct rhashtable entries_ht; /* A-TCAM only */
+       struct mlxsw_sp_acl_ctcam_region cregion;
+       const struct mlxsw_sp_acl_atcam_region_ops *ops;
+       struct mlxsw_sp_acl_tcam_region *region;
+       struct mlxsw_sp_acl_atcam *atcam;
+       enum mlxsw_sp_acl_atcam_region_type type;
+       struct mlxsw_sp_acl_erp_table *erp_table;
+       void *priv;
+};
+
+struct mlxsw_sp_acl_atcam_entry_ht_key {
+       char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key */
+       u8 erp_id;
+};
+
+struct mlxsw_sp_acl_atcam_chunk {
+       struct mlxsw_sp_acl_ctcam_chunk cchunk;
+};
+
+struct mlxsw_sp_acl_atcam_entry {
+       struct rhash_head ht_node;
+       struct mlxsw_sp_acl_atcam_entry_ht_key ht_key;
+       struct mlxsw_sp_acl_ctcam_entry centry;
+       struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
+       struct mlxsw_sp_acl_erp *erp;
+};
+
+static inline struct mlxsw_sp_acl_atcam_region *
+mlxsw_sp_acl_tcam_cregion_aregion(struct mlxsw_sp_acl_ctcam_region *cregion)
+{
+       return container_of(cregion, struct mlxsw_sp_acl_atcam_region, cregion);
+}
+
+static inline struct mlxsw_sp_acl_atcam_entry *
+mlxsw_sp_acl_tcam_centry_aentry(struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+       return container_of(centry, struct mlxsw_sp_acl_atcam_entry, centry);
+}
+
+int mlxsw_sp_acl_atcam_region_associate(struct mlxsw_sp *mlxsw_sp,
+                                       u16 region_id);
+int
+mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_acl_atcam *atcam,
+                              struct mlxsw_sp_acl_atcam_region *aregion,
+                              struct mlxsw_sp_acl_tcam_region *region,
+                              const struct mlxsw_sp_acl_ctcam_region_ops *ops);
+void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion);
+void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion,
+                                  struct mlxsw_sp_acl_atcam_chunk *achunk,
+                                  unsigned int priority);
+void mlxsw_sp_acl_atcam_chunk_fini(struct mlxsw_sp_acl_atcam_chunk *achunk);
+int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_atcam_region *aregion,
+                                struct mlxsw_sp_acl_atcam_chunk *achunk,
+                                struct mlxsw_sp_acl_atcam_entry *aentry,
+                                struct mlxsw_sp_acl_rule_info *rulei);
+void mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+                                 struct mlxsw_sp_acl_atcam_region *aregion,
+                                 struct mlxsw_sp_acl_atcam_chunk *achunk,
+                                 struct mlxsw_sp_acl_atcam_entry *aentry);
+int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp,
+                           struct mlxsw_sp_acl_atcam *atcam);
+void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp,
+                            struct mlxsw_sp_acl_atcam *atcam);
+
+struct mlxsw_sp_acl_erp;
+
+bool mlxsw_sp_acl_erp_is_ctcam_erp(const struct mlxsw_sp_acl_erp *erp);
+u8 mlxsw_sp_acl_erp_id(const struct mlxsw_sp_acl_erp *erp);
+struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_get(struct mlxsw_sp_acl_atcam_region *aregion,
+                    const char *mask, bool ctcam);
+void mlxsw_sp_acl_erp_put(struct mlxsw_sp_acl_atcam_region *aregion,
+                         struct mlxsw_sp_acl_erp *erp);
+int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion);
+void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion);
+int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp,
+                          struct mlxsw_sp_acl_atcam *atcam);
+void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp,
+                           struct mlxsw_sp_acl_atcam *atcam);
+
+#endif
index b6ed7f7c531eec666dfe6ee8343fdd0eb9aa3e1e..c31aeb25ab5aa11082f6980a98d950959c131343 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
@@ -255,6 +255,270 @@ static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev,
        return 0;
 }
 
+static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev,
+                                      struct dcb_app *app)
+{
+       int prio;
+
+       if (app->priority >= IEEE_8021QAZ_MAX_TCS) {
+               netdev_err(dev, "APP entry with priority value %u is invalid\n",
+                          app->priority);
+               return -EINVAL;
+       }
+
+       switch (app->selector) {
+       case IEEE_8021QAZ_APP_SEL_DSCP:
+               if (app->protocol >= 64) {
+                       netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n",
+                                  app->protocol);
+                       return -EINVAL;
+               }
+
+               /* Warn about any DSCP APP entries with the same PID. */
+               prio = fls(dcb_ieee_getapp_mask(dev, app));
+               if (prio--) {
+                       if (prio < app->priority)
+                               netdev_warn(dev, "Choosing priority %d for DSCP %d in favor of previously-active value of %d\n",
+                                           app->priority, app->protocol, prio);
+                       else if (prio > app->priority)
+                               netdev_warn(dev, "Ignoring new priority %d for DSCP %d in favor of current value of %d\n",
+                                           app->priority, app->protocol, prio);
+               }
+               break;
+
+       case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
+               if (app->protocol) {
+                       netdev_err(dev, "EtherType APP entries with protocol value != 0 not supported\n");
+                       return -EINVAL;
+               }
+               break;
+
+       default:
+               netdev_err(dev, "APP entries with selector %u not supported\n",
+                          app->selector);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static u8
+mlxsw_sp_port_dcb_app_default_prio(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       u8 prio_mask;
+
+       prio_mask = dcb_ieee_getapp_default_prio_mask(mlxsw_sp_port->dev);
+       if (prio_mask)
+               /* Take the highest configured priority. */
+               return fls(prio_mask) - 1;
+
+       return 0;
+}
+
+static void
+mlxsw_sp_port_dcb_app_dscp_prio_map(struct mlxsw_sp_port *mlxsw_sp_port,
+                                   u8 default_prio,
+                                   struct dcb_ieee_app_dscp_map *map)
+{
+       int i;
+
+       dcb_ieee_getapp_dscp_prio_mask_map(mlxsw_sp_port->dev, map);
+       for (i = 0; i < ARRAY_SIZE(map->map); ++i) {
+               if (map->map[i])
+                       map->map[i] = fls(map->map[i]) - 1;
+               else
+                       map->map[i] = default_prio;
+       }
+}
+
+static bool
+mlxsw_sp_port_dcb_app_prio_dscp_map(struct mlxsw_sp_port *mlxsw_sp_port,
+                                   struct dcb_ieee_app_prio_map *map)
+{
+       bool have_dscp = false;
+       int i;
+
+       dcb_ieee_getapp_prio_dscp_mask_map(mlxsw_sp_port->dev, map);
+       for (i = 0; i < ARRAY_SIZE(map->map); ++i) {
+               if (map->map[i]) {
+                       map->map[i] = fls64(map->map[i]) - 1;
+                       have_dscp = true;
+               }
+       }
+
+       return have_dscp;
+}
+
+static int
+mlxsw_sp_port_dcb_app_update_qpts(struct mlxsw_sp_port *mlxsw_sp_port,
+                                 enum mlxsw_reg_qpts_trust_state ts)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char qpts_pl[MLXSW_REG_QPTS_LEN];
+
+       mlxsw_reg_qpts_pack(qpts_pl, mlxsw_sp_port->local_port, ts);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpts), qpts_pl);
+}
+
+static int
+mlxsw_sp_port_dcb_app_update_qrwe(struct mlxsw_sp_port *mlxsw_sp_port,
+                                 bool rewrite_dscp)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char qrwe_pl[MLXSW_REG_QRWE_LEN];
+
+       mlxsw_reg_qrwe_pack(qrwe_pl, mlxsw_sp_port->local_port,
+                           false, rewrite_dscp);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qrwe), qrwe_pl);
+}
+
+static int
+mlxsw_sp_port_dcb_toggle_trust(struct mlxsw_sp_port *mlxsw_sp_port,
+                              enum mlxsw_reg_qpts_trust_state ts)
+{
+       bool rewrite_dscp = ts == MLXSW_REG_QPTS_TRUST_STATE_DSCP;
+       int err;
+
+       if (mlxsw_sp_port->dcb.trust_state == ts)
+               return 0;
+
+       err = mlxsw_sp_port_dcb_app_update_qpts(mlxsw_sp_port, ts);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_port_dcb_app_update_qrwe(mlxsw_sp_port, rewrite_dscp);
+       if (err)
+               goto err_update_qrwe;
+
+       mlxsw_sp_port->dcb.trust_state = ts;
+       return 0;
+
+err_update_qrwe:
+       mlxsw_sp_port_dcb_app_update_qpts(mlxsw_sp_port,
+                                         mlxsw_sp_port->dcb.trust_state);
+       return err;
+}
+
+static int
+mlxsw_sp_port_dcb_app_update_qpdpm(struct mlxsw_sp_port *mlxsw_sp_port,
+                                  struct dcb_ieee_app_dscp_map *map)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char qpdpm_pl[MLXSW_REG_QPDPM_LEN];
+       short int i;
+
+       mlxsw_reg_qpdpm_pack(qpdpm_pl, mlxsw_sp_port->local_port);
+       for (i = 0; i < ARRAY_SIZE(map->map); ++i)
+               mlxsw_reg_qpdpm_dscp_pack(qpdpm_pl, i, map->map[i]);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdpm), qpdpm_pl);
+}
+
+static int
+mlxsw_sp_port_dcb_app_update_qpdsm(struct mlxsw_sp_port *mlxsw_sp_port,
+                                  struct dcb_ieee_app_prio_map *map)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char qpdsm_pl[MLXSW_REG_QPDSM_LEN];
+       short int i;
+
+       mlxsw_reg_qpdsm_pack(qpdsm_pl, mlxsw_sp_port->local_port);
+       for (i = 0; i < ARRAY_SIZE(map->map); ++i)
+               mlxsw_reg_qpdsm_prio_pack(qpdsm_pl, i, map->map[i]);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdsm), qpdsm_pl);
+}
+
+static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct dcb_ieee_app_prio_map prio_map;
+       struct dcb_ieee_app_dscp_map dscp_map;
+       u8 default_prio;
+       bool have_dscp;
+       int err;
+
+       default_prio = mlxsw_sp_port_dcb_app_default_prio(mlxsw_sp_port);
+       have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port,
+                                                       &prio_map);
+
+       if (!have_dscp) {
+               err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
+                                       MLXSW_REG_QPTS_TRUST_STATE_PCP);
+               if (err)
+                       netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n");
+               return err;
+       }
+
+       mlxsw_sp_port_dcb_app_dscp_prio_map(mlxsw_sp_port, default_prio,
+                                           &dscp_map);
+       err = mlxsw_sp_port_dcb_app_update_qpdpm(mlxsw_sp_port,
+                                                &dscp_map);
+       if (err) {
+               netdev_err(mlxsw_sp_port->dev, "Couldn't configure priority map\n");
+               return err;
+       }
+
+       err = mlxsw_sp_port_dcb_app_update_qpdsm(mlxsw_sp_port,
+                                                &prio_map);
+       if (err) {
+               netdev_err(mlxsw_sp_port->dev, "Couldn't configure DSCP rewrite map\n");
+               return err;
+       }
+
+       err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
+                                            MLXSW_REG_QPTS_TRUST_STATE_DSCP);
+       if (err) {
+               /* A failure to set trust DSCP means that the QPDPM and QPDSM
+                * maps installed above are not in effect. And since we are here
+                * attempting to set trust DSCP, we couldn't have attempted to
+                * switch trust to PCP. Thus no cleanup is necessary.
+                */
+               netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L3\n");
+               return err;
+       }
+
+       return 0;
+}
+
+static int mlxsw_sp_dcbnl_ieee_setapp(struct net_device *dev,
+                                     struct dcb_app *app)
+{
+       struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+       int err;
+
+       err = mlxsw_sp_dcbnl_app_validate(dev, app);
+       if (err)
+               return err;
+
+       err = dcb_ieee_setapp(dev, app);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_port_dcb_app_update(mlxsw_sp_port);
+       if (err)
+               goto err_update;
+
+       return 0;
+
+err_update:
+       dcb_ieee_delapp(dev, app);
+       return err;
+}
+
+static int mlxsw_sp_dcbnl_ieee_delapp(struct net_device *dev,
+                                     struct dcb_app *app)
+{
+       struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+       int err;
+
+       err = dcb_ieee_delapp(dev, app);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_port_dcb_app_update(mlxsw_sp_port);
+       if (err)
+               netdev_err(dev, "Failed to update DCB APP configuration\n");
+       return 0;
+}
+
 static int mlxsw_sp_dcbnl_ieee_getmaxrate(struct net_device *dev,
                                          struct ieee_maxrate *maxrate)
 {
@@ -394,6 +658,8 @@ static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = {
        .ieee_setmaxrate        = mlxsw_sp_dcbnl_ieee_setmaxrate,
        .ieee_getpfc            = mlxsw_sp_dcbnl_ieee_getpfc,
        .ieee_setpfc            = mlxsw_sp_dcbnl_ieee_setpfc,
+       .ieee_setapp            = mlxsw_sp_dcbnl_ieee_setapp,
+       .ieee_delapp            = mlxsw_sp_dcbnl_ieee_delapp,
 
        .getdcbx                = mlxsw_sp_dcbnl_getdcbx,
        .setdcbx                = mlxsw_sp_dcbnl_setdcbx,
@@ -467,6 +733,7 @@ int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port)
        if (err)
                goto err_port_pfc_init;
 
+       mlxsw_sp_port->dcb.trust_state = MLXSW_REG_QPTS_TRUST_STATE_PCP;
        mlxsw_sp_port->dev->dcbnl_ops = &mlxsw_sp_dcbnl_ops;
 
        return 0;
index 89dbf569dff50c0db7d97d3b4e80e8bd7cf494d6..8213cb7190fa2ecc482217eaf2f1860a2c4353b5 100644 (file)
@@ -48,7 +48,8 @@
 static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
                                         struct mlxsw_sp_acl_block *block,
                                         struct mlxsw_sp_acl_rule_info *rulei,
-                                        struct tcf_exts *exts)
+                                        struct tcf_exts *exts,
+                                        struct netlink_ext_ack *extack)
 {
        const struct tc_action *a;
        LIST_HEAD(actions);
@@ -58,7 +59,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
                return 0;
 
        /* Count action is inserted first */
-       err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei);
+       err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack);
        if (err)
                return err;
 
@@ -66,16 +67,22 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
        list_for_each_entry(a, &actions, list) {
                if (is_tcf_gact_ok(a)) {
                        err = mlxsw_sp_acl_rulei_act_terminate(rulei);
-                       if (err)
+                       if (err) {
+                               NL_SET_ERR_MSG_MOD(extack, "Cannot append terminate action");
                                return err;
+                       }
                } else if (is_tcf_gact_shot(a)) {
                        err = mlxsw_sp_acl_rulei_act_drop(rulei);
-                       if (err)
+                       if (err) {
+                               NL_SET_ERR_MSG_MOD(extack, "Cannot append drop action");
                                return err;
+                       }
                } else if (is_tcf_gact_trap(a)) {
                        err = mlxsw_sp_acl_rulei_act_trap(rulei);
-                       if (err)
+                       if (err) {
+                               NL_SET_ERR_MSG_MOD(extack, "Cannot append trap action");
                                return err;
+                       }
                } else if (is_tcf_gact_goto_chain(a)) {
                        u32 chain_index = tcf_gact_goto_chain_index(a);
                        struct mlxsw_sp_acl_ruleset *ruleset;
@@ -89,8 +96,10 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 
                        group_id = mlxsw_sp_acl_ruleset_group_id(ruleset);
                        err = mlxsw_sp_acl_rulei_act_jump(rulei, group_id);
-                       if (err)
+                       if (err) {
+                               NL_SET_ERR_MSG_MOD(extack, "Cannot append jump action");
                                return err;
+                       }
                } else if (is_tcf_mirred_egress_redirect(a)) {
                        struct net_device *out_dev;
                        struct mlxsw_sp_fid *fid;
@@ -99,20 +108,21 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
                        fid = mlxsw_sp_acl_dummy_fid(mlxsw_sp);
                        fid_index = mlxsw_sp_fid_index(fid);
                        err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei,
-                                                            fid_index);
+                                                            fid_index, extack);
                        if (err)
                                return err;
 
                        out_dev = tcf_mirred_dev(a);
                        err = mlxsw_sp_acl_rulei_act_fwd(mlxsw_sp, rulei,
-                                                        out_dev);
+                                                        out_dev, extack);
                        if (err)
                                return err;
                } else if (is_tcf_mirred_egress_mirror(a)) {
                        struct net_device *out_dev = tcf_mirred_dev(a);
 
                        err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei,
-                                                           block, out_dev);
+                                                           block, out_dev,
+                                                           extack);
                        if (err)
                                return err;
                } else if (is_tcf_vlan(a)) {
@@ -123,8 +133,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 
                        return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei,
                                                           action, vid,
-                                                          proto, prio);
+                                                          proto, prio, extack);
                } else {
+                       NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
                        dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n");
                        return -EOPNOTSUPP;
                }
@@ -144,10 +155,12 @@ static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
                                          FLOW_DISSECTOR_KEY_IPV4_ADDRS,
                                          f->mask);
 
-       mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_SRC_IP4,
-                                      ntohl(key->src), ntohl(mask->src));
-       mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DST_IP4,
-                                      ntohl(key->dst), ntohl(mask->dst));
+       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+                                      (char *) &key->src,
+                                      (char *) &mask->src, 4);
+       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
+                                      (char *) &key->dst,
+                                      (char *) &mask->dst, 4);
 }
 
 static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
@@ -161,24 +174,31 @@ static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
                skb_flow_dissector_target(f->dissector,
                                          FLOW_DISSECTOR_KEY_IPV6_ADDRS,
                                          f->mask);
-       size_t addr_half_size = sizeof(key->src) / 2;
-
-       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP6_HI,
-                                      &key->src.s6_addr[0],
-                                      &mask->src.s6_addr[0],
-                                      addr_half_size);
-       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP6_LO,
-                                      &key->src.s6_addr[addr_half_size],
-                                      &mask->src.s6_addr[addr_half_size],
-                                      addr_half_size);
-       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP6_HI,
-                                      &key->dst.s6_addr[0],
-                                      &mask->dst.s6_addr[0],
-                                      addr_half_size);
-       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP6_LO,
-                                      &key->dst.s6_addr[addr_half_size],
-                                      &mask->dst.s6_addr[addr_half_size],
-                                      addr_half_size);
+
+       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+                                      &key->src.s6_addr[0x0],
+                                      &mask->src.s6_addr[0x0], 4);
+       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+                                      &key->src.s6_addr[0x4],
+                                      &mask->src.s6_addr[0x4], 4);
+       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+                                      &key->src.s6_addr[0x8],
+                                      &mask->src.s6_addr[0x8], 4);
+       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+                                      &key->src.s6_addr[0xC],
+                                      &mask->src.s6_addr[0xC], 4);
+       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127,
+                                      &key->dst.s6_addr[0x0],
+                                      &mask->dst.s6_addr[0x0], 4);
+       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95,
+                                      &key->dst.s6_addr[0x4],
+                                      &mask->dst.s6_addr[0x4], 4);
+       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63,
+                                      &key->dst.s6_addr[0x8],
+                                      &mask->dst.s6_addr[0x8], 4);
+       mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
+                                      &key->dst.s6_addr[0xC],
+                                      &mask->dst.s6_addr[0xC], 4);
 }
 
 static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
@@ -192,6 +212,7 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
                return 0;
 
        if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) {
+               NL_SET_ERR_MSG_MOD(f->common.extack, "Only UDP and TCP keys are supported");
                dev_err(mlxsw_sp->bus_info->dev, "Only UDP and TCP keys are supported\n");
                return -EINVAL;
        }
@@ -220,6 +241,7 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
                return 0;
 
        if (ip_proto != IPPROTO_TCP) {
+               NL_SET_ERR_MSG_MOD(f->common.extack, "TCP keys supported only for TCP");
                dev_err(mlxsw_sp->bus_info->dev, "TCP keys supported only for TCP\n");
                return -EINVAL;
        }
@@ -246,6 +268,7 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
                return 0;
 
        if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) {
+               NL_SET_ERR_MSG_MOD(f->common.extack, "IP keys supported only for IPv4/6");
                dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n");
                return -EINVAL;
        }
@@ -290,6 +313,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
              BIT(FLOW_DISSECTOR_KEY_IP) |
              BIT(FLOW_DISSECTOR_KEY_VLAN))) {
                dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n");
+               NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported key");
                return -EOPNOTSUPP;
        }
 
@@ -340,13 +364,17 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
                                                  f->mask);
 
                mlxsw_sp_acl_rulei_keymask_buf(rulei,
-                                              MLXSW_AFK_ELEMENT_DMAC,
-                                              key->dst, mask->dst,
-                                              sizeof(key->dst));
+                                              MLXSW_AFK_ELEMENT_DMAC_32_47,
+                                              key->dst, mask->dst, 2);
                mlxsw_sp_acl_rulei_keymask_buf(rulei,
-                                              MLXSW_AFK_ELEMENT_SMAC,
-                                              key->src, mask->src,
-                                              sizeof(key->src));
+                                              MLXSW_AFK_ELEMENT_DMAC_0_31,
+                                              key->dst + 2, mask->dst + 2, 4);
+               mlxsw_sp_acl_rulei_keymask_buf(rulei,
+                                              MLXSW_AFK_ELEMENT_SMAC_32_47,
+                                              key->src, mask->src, 2);
+               mlxsw_sp_acl_rulei_keymask_buf(rulei,
+                                              MLXSW_AFK_ELEMENT_SMAC_0_31,
+                                              key->src + 2, mask->src + 2, 4);
        }
 
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
@@ -387,7 +415,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
        if (err)
                return err;
 
-       return mlxsw_sp_flower_parse_actions(mlxsw_sp, block, rulei, f->exts);
+       return mlxsw_sp_flower_parse_actions(mlxsw_sp, block, rulei, f->exts,
+                                            f->common.extack);
 }
 
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
@@ -401,11 +430,12 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
 
        ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
                                           f->common.chain_index,
-                                          MLXSW_SP_ACL_PROFILE_FLOWER);
+                                          MLXSW_SP_ACL_PROFILE_FLOWER, NULL);
        if (IS_ERR(ruleset))
                return PTR_ERR(ruleset);
 
-       rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, f->cookie);
+       rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, f->cookie,
+                                       f->common.extack);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
                goto err_rule_create;
@@ -445,7 +475,7 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
 
        ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
                                           f->common.chain_index,
-                                          MLXSW_SP_ACL_PROFILE_FLOWER);
+                                          MLXSW_SP_ACL_PROFILE_FLOWER, NULL);
        if (IS_ERR(ruleset))
                return;
 
@@ -471,7 +501,7 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
 
        ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
                                           f->common.chain_index,
-                                          MLXSW_SP_ACL_PROFILE_FLOWER);
+                                          MLXSW_SP_ACL_PROFILE_FLOWER, NULL);
        if (WARN_ON(IS_ERR(ruleset)))
                return -EINVAL;
 
@@ -493,3 +523,41 @@ err_rule_get_stats:
        mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
        return err;
 }
+
+int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_acl_block *block,
+                                struct tc_cls_flower_offload *f)
+{
+       struct mlxsw_sp_acl_ruleset *ruleset;
+       struct mlxsw_sp_acl_rule_info rulei;
+       int err;
+
+       memset(&rulei, 0, sizeof(rulei));
+       err = mlxsw_sp_flower_parse(mlxsw_sp, block, &rulei, f);
+       if (err)
+               return err;
+       ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
+                                          f->common.chain_index,
+                                          MLXSW_SP_ACL_PROFILE_FLOWER,
+                                          &rulei.values.elusage);
+       if (IS_ERR(ruleset))
+               return PTR_ERR(ruleset);
+       /* keep the reference to the ruleset */
+       return 0;
+}
+
+void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
+                                  struct mlxsw_sp_acl_block *block,
+                                  struct tc_cls_flower_offload *f)
+{
+       struct mlxsw_sp_acl_ruleset *ruleset;
+
+       ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
+                                          f->common.chain_index,
+                                          MLXSW_SP_ACL_PROFILE_FLOWER, NULL);
+       if (IS_ERR(ruleset))
+               return;
+       /* put the reference to the ruleset kept in create */
+       mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+       mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+}
index fe4327f547d23b1caa5138cfe1544f05af2511d3..fd557585514d50def8762283dd3afa86c10d0194 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2018 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  */
 
 #include <linux/kernel.h>
-#include <linux/bitops.h>
+#include <linux/slab.h>
 
 #include "spectrum.h"
 
-#define MLXSW_SP_KVDL_SINGLE_BASE 0
-#define MLXSW_SP_KVDL_SINGLE_SIZE 16384
-#define MLXSW_SP_KVDL_SINGLE_END \
-       (MLXSW_SP_KVDL_SINGLE_SIZE + MLXSW_SP_KVDL_SINGLE_BASE - 1)
-
-#define MLXSW_SP_KVDL_CHUNKS_BASE \
-       (MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE)
-#define MLXSW_SP_KVDL_CHUNKS_SIZE 49152
-#define MLXSW_SP_KVDL_CHUNKS_END \
-       (MLXSW_SP_KVDL_CHUNKS_SIZE + MLXSW_SP_KVDL_CHUNKS_BASE - 1)
-
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_BASE \
-       (MLXSW_SP_KVDL_CHUNKS_BASE + MLXSW_SP_KVDL_CHUNKS_SIZE)
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE \
-       (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_LARGE_CHUNKS_BASE)
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_END \
-       (MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP_KVDL_LARGE_CHUNKS_BASE - 1)
-
-#define MLXSW_SP_KVDL_SINGLE_ALLOC_SIZE 1
-#define MLXSW_SP_KVDL_CHUNKS_ALLOC_SIZE 32
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512
-
-struct mlxsw_sp_kvdl_part_info {
-       unsigned int part_index;
-       unsigned int start_index;
-       unsigned int end_index;
-       unsigned int alloc_size;
-       enum mlxsw_sp_resource_id resource_id;
-};
-
-enum mlxsw_sp_kvdl_part_id {
-       MLXSW_SP_KVDL_PART_ID_SINGLE,
-       MLXSW_SP_KVDL_PART_ID_CHUNKS,
-       MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS,
-};
-
-#define MLXSW_SP_KVDL_PART_INFO(id)                            \
-[MLXSW_SP_KVDL_PART_ID_##id] = {                               \
-       .start_index = MLXSW_SP_KVDL_##id##_BASE,               \
-       .end_index = MLXSW_SP_KVDL_##id##_END,                  \
-       .alloc_size = MLXSW_SP_KVDL_##id##_ALLOC_SIZE,          \
-       .resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id,       \
-}
-
-static const struct mlxsw_sp_kvdl_part_info mlxsw_sp_kvdl_parts_info[] = {
-       MLXSW_SP_KVDL_PART_INFO(SINGLE),
-       MLXSW_SP_KVDL_PART_INFO(CHUNKS),
-       MLXSW_SP_KVDL_PART_INFO(LARGE_CHUNKS),
-};
-
-#define MLXSW_SP_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp_kvdl_parts_info)
-
-struct mlxsw_sp_kvdl_part {
-       struct mlxsw_sp_kvdl_part_info info;
-       unsigned long usage[0]; /* Entries */
-};
-
 struct mlxsw_sp_kvdl {
-       struct mlxsw_sp_kvdl_part *parts[MLXSW_SP_KVDL_PARTS_INFO_LEN];
+       const struct mlxsw_sp_kvdl_ops *kvdl_ops;
+       unsigned long priv[0];
+       /* priv has to be always the last item */
 };
 
-static struct mlxsw_sp_kvdl_part *
-mlxsw_sp_kvdl_alloc_size_part(struct mlxsw_sp_kvdl *kvdl,
-                             unsigned int alloc_size)
-{
-       struct mlxsw_sp_kvdl_part *part, *min_part = NULL;
-       int i;
-
-       for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
-               part = kvdl->parts[i];
-               if (alloc_size <= part->info.alloc_size &&
-                   (!min_part ||
-                    part->info.alloc_size <= min_part->info.alloc_size))
-                       min_part = part;
-       }
-
-       return min_part ?: ERR_PTR(-ENOBUFS);
-}
-
-static struct mlxsw_sp_kvdl_part *
-mlxsw_sp_kvdl_index_part(struct mlxsw_sp_kvdl *kvdl, u32 kvdl_index)
-{
-       struct mlxsw_sp_kvdl_part *part;
-       int i;
-
-       for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
-               part = kvdl->parts[i];
-               if (kvdl_index >= part->info.start_index &&
-                   kvdl_index <= part->info.end_index)
-                       return part;
-       }
-
-       return ERR_PTR(-EINVAL);
-}
-
-static u32
-mlxsw_sp_entry_index_kvdl_index(const struct mlxsw_sp_kvdl_part_info *info,
-                               unsigned int entry_index)
-{
-       return info->start_index + entry_index * info->alloc_size;
-}
-
-static unsigned int
-mlxsw_sp_kvdl_index_entry_index(const struct mlxsw_sp_kvdl_part_info *info,
-                               u32 kvdl_index)
-{
-       return (kvdl_index - info->start_index) / info->alloc_size;
-}
-
-static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part,
-                                   u32 *p_kvdl_index)
-{
-       const struct mlxsw_sp_kvdl_part_info *info = &part->info;
-       unsigned int entry_index, nr_entries;
-
-       nr_entries = (info->end_index - info->start_index + 1) /
-                    info->alloc_size;
-       entry_index = find_first_zero_bit(part->usage, nr_entries);
-       if (entry_index == nr_entries)
-               return -ENOBUFS;
-       __set_bit(entry_index, part->usage);
-
-       *p_kvdl_index = mlxsw_sp_entry_index_kvdl_index(info, entry_index);
-
-       return 0;
-}
-
-static void mlxsw_sp_kvdl_part_free(struct mlxsw_sp_kvdl_part *part,
-                                   u32 kvdl_index)
-{
-       const struct mlxsw_sp_kvdl_part_info *info = &part->info;
-       unsigned int entry_index;
-
-       entry_index = mlxsw_sp_kvdl_index_entry_index(info, kvdl_index);
-       __clear_bit(entry_index, part->usage);
-}
-
-int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
-                       u32 *p_entry_index)
-{
-       struct mlxsw_sp_kvdl_part *part;
-
-       /* Find partition with smallest allocation size satisfying the
-        * requested size.
-        */
-       part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count);
-       if (IS_ERR(part))
-               return PTR_ERR(part);
-
-       return mlxsw_sp_kvdl_part_alloc(part, p_entry_index);
-}
-
-void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index)
-{
-       struct mlxsw_sp_kvdl_part *part;
-
-       part = mlxsw_sp_kvdl_index_part(mlxsw_sp->kvdl, entry_index);
-       if (IS_ERR(part))
-               return;
-       mlxsw_sp_kvdl_part_free(part, entry_index);
-}
-
-int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
-                                  unsigned int entry_count,
-                                  unsigned int *p_alloc_size)
-{
-       struct mlxsw_sp_kvdl_part *part;
-
-       part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count);
-       if (IS_ERR(part))
-               return PTR_ERR(part);
-
-       *p_alloc_size = part->info.alloc_size;
-
-       return 0;
-}
-
-static void mlxsw_sp_kvdl_part_update(struct mlxsw_sp_kvdl_part *part,
-                                     struct mlxsw_sp_kvdl_part *part_prev,
-                                     unsigned int size)
-{
-
-       if (!part_prev) {
-               part->info.end_index = size - 1;
-       } else {
-               part->info.start_index = part_prev->info.end_index + 1;
-               part->info.end_index = part->info.start_index + size - 1;
-       }
-}
-
-static struct mlxsw_sp_kvdl_part *
-mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
-                       const struct mlxsw_sp_kvdl_part_info *info,
-                       struct mlxsw_sp_kvdl_part *part_prev)
+int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
 {
-       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
-       struct mlxsw_sp_kvdl_part *part;
-       bool need_update = true;
-       unsigned int nr_entries;
-       size_t usage_size;
-       u64 resource_size;
+       const struct mlxsw_sp_kvdl_ops *kvdl_ops = mlxsw_sp->kvdl_ops;
+       struct mlxsw_sp_kvdl *kvdl;
        int err;
 
-       err = devlink_resource_size_get(devlink, info->resource_id,
-                                       &resource_size);
-       if (err) {
-               need_update = false;
-               resource_size = info->end_index - info->start_index + 1;
-       }
-
-       nr_entries = div_u64(resource_size, info->alloc_size);
-       usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
-       part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
-       if (!part)
-               return ERR_PTR(-ENOMEM);
-
-       memcpy(&part->info, info, sizeof(part->info));
-
-       if (need_update)
-               mlxsw_sp_kvdl_part_update(part, part_prev, resource_size);
-       return part;
-}
-
-static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp_kvdl_part *part)
-{
-       kfree(part);
-}
-
-static int mlxsw_sp_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp)
-{
-       struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
-       const struct mlxsw_sp_kvdl_part_info *info;
-       struct mlxsw_sp_kvdl_part *part_prev = NULL;
-       int err, i;
+       kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl) + kvdl_ops->priv_size,
+                      GFP_KERNEL);
+       if (!kvdl)
+               return -ENOMEM;
+       kvdl->kvdl_ops = kvdl_ops;
+       mlxsw_sp->kvdl = kvdl;
 
-       for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
-               info = &mlxsw_sp_kvdl_parts_info[i];
-               kvdl->parts[i] = mlxsw_sp_kvdl_part_init(mlxsw_sp, info,
-                                                        part_prev);
-               if (IS_ERR(kvdl->parts[i])) {
-                       err = PTR_ERR(kvdl->parts[i]);
-                       goto err_kvdl_part_init;
-               }
-               part_prev = kvdl->parts[i];
-       }
+       err = kvdl_ops->init(mlxsw_sp, kvdl->priv);
+       if (err)
+               goto err_init;
        return 0;
 
-err_kvdl_part_init:
-       for (i--; i >= 0; i--)
-               mlxsw_sp_kvdl_part_fini(kvdl->parts[i]);
+err_init:
+       kfree(kvdl);
        return err;
 }
 
-static void mlxsw_sp_kvdl_parts_fini(struct mlxsw_sp *mlxsw_sp)
+void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp)
 {
        struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
-       int i;
-
-       for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++)
-               mlxsw_sp_kvdl_part_fini(kvdl->parts[i]);
-}
-
-static u64 mlxsw_sp_kvdl_part_occ(struct mlxsw_sp_kvdl_part *part)
-{
-       const struct mlxsw_sp_kvdl_part_info *info = &part->info;
-       unsigned int nr_entries;
-       int bit = -1;
-       u64 occ = 0;
-
-       nr_entries = (info->end_index -
-                     info->start_index + 1) /
-                     info->alloc_size;
-       while ((bit = find_next_bit(part->usage, nr_entries, bit + 1))
-               < nr_entries)
-               occ += info->alloc_size;
-       return occ;
-}
-
-static u64 mlxsw_sp_kvdl_occ_get(void *priv)
-{
-       const struct mlxsw_sp *mlxsw_sp = priv;
-       u64 occ = 0;
-       int i;
-
-       for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++)
-               occ += mlxsw_sp_kvdl_part_occ(mlxsw_sp->kvdl->parts[i]);
-
-       return occ;
-}
-
-static u64 mlxsw_sp_kvdl_single_occ_get(void *priv)
-{
-       const struct mlxsw_sp *mlxsw_sp = priv;
-       struct mlxsw_sp_kvdl_part *part;
-
-       part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_SINGLE];
-       return mlxsw_sp_kvdl_part_occ(part);
-}
-
-static u64 mlxsw_sp_kvdl_chunks_occ_get(void *priv)
-{
-       const struct mlxsw_sp *mlxsw_sp = priv;
-       struct mlxsw_sp_kvdl_part *part;
-
-       part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_CHUNKS];
-       return mlxsw_sp_kvdl_part_occ(part);
-}
-
-static u64 mlxsw_sp_kvdl_large_chunks_occ_get(void *priv)
-{
-       const struct mlxsw_sp *mlxsw_sp = priv;
-       struct mlxsw_sp_kvdl_part *part;
 
-       part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS];
-       return mlxsw_sp_kvdl_part_occ(part);
+       kvdl->kvdl_ops->fini(mlxsw_sp, kvdl->priv);
+       kfree(kvdl);
 }
 
-int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp,
+                       enum mlxsw_sp_kvdl_entry_type type,
+                       unsigned int entry_count, u32 *p_entry_index)
 {
-       struct devlink *devlink = priv_to_devlink(mlxsw_core);
-       static struct devlink_resource_size_params size_params;
-       u32 kvdl_max_size;
-       int err;
-
-       kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
-                       MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
-                       MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
-
-       devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-                                         MLXSW_SP_KVDL_SINGLE_ALLOC_SIZE,
-                                         DEVLINK_RESOURCE_UNIT_ENTRY);
-       err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
-                                       MLXSW_SP_KVDL_SINGLE_SIZE,
-                                       MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
-                                       MLXSW_SP_RESOURCE_KVD_LINEAR,
-                                       &size_params);
-       if (err)
-               return err;
-
-       devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-                                         MLXSW_SP_KVDL_CHUNKS_ALLOC_SIZE,
-                                         DEVLINK_RESOURCE_UNIT_ENTRY);
-       err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
-                                       MLXSW_SP_KVDL_CHUNKS_SIZE,
-                                       MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
-                                       MLXSW_SP_RESOURCE_KVD_LINEAR,
-                                       &size_params);
-       if (err)
-               return err;
+       struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
 
-       devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-                                         MLXSW_SP_KVDL_LARGE_CHUNKS_ALLOC_SIZE,
-                                         DEVLINK_RESOURCE_UNIT_ENTRY);
-       err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
-                                       MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
-                                       MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
-                                       MLXSW_SP_RESOURCE_KVD_LINEAR,
-                                       &size_params);
-       return err;
+       return kvdl->kvdl_ops->alloc(mlxsw_sp, kvdl->priv, type,
+                                    entry_count, p_entry_index);
 }
 
-int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp,
+                       enum mlxsw_sp_kvdl_entry_type type,
+                       unsigned int entry_count, int entry_index)
 {
-       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
-       struct mlxsw_sp_kvdl *kvdl;
-       int err;
-
-       kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl), GFP_KERNEL);
-       if (!kvdl)
-               return -ENOMEM;
-       mlxsw_sp->kvdl = kvdl;
-
-       err = mlxsw_sp_kvdl_parts_init(mlxsw_sp);
-       if (err)
-               goto err_kvdl_parts_init;
-
-       devlink_resource_occ_get_register(devlink,
-                                         MLXSW_SP_RESOURCE_KVD_LINEAR,
-                                         mlxsw_sp_kvdl_occ_get,
-                                         mlxsw_sp);
-       devlink_resource_occ_get_register(devlink,
-                                         MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
-                                         mlxsw_sp_kvdl_single_occ_get,
-                                         mlxsw_sp);
-       devlink_resource_occ_get_register(devlink,
-                                         MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
-                                         mlxsw_sp_kvdl_chunks_occ_get,
-                                         mlxsw_sp);
-       devlink_resource_occ_get_register(devlink,
-                                         MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
-                                         mlxsw_sp_kvdl_large_chunks_occ_get,
-                                         mlxsw_sp);
-
-       return 0;
+       struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
 
-err_kvdl_parts_init:
-       kfree(mlxsw_sp->kvdl);
-       return err;
+       kvdl->kvdl_ops->free(mlxsw_sp, kvdl->priv, type,
+                            entry_count, entry_index);
 }
 
-void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp)
+int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp,
+                                   enum mlxsw_sp_kvdl_entry_type type,
+                                   unsigned int entry_count,
+                                   unsigned int *p_alloc_count)
 {
-       struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+       struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
 
-       devlink_resource_occ_get_unregister(devlink,
-                                           MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS);
-       devlink_resource_occ_get_unregister(devlink,
-                                           MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS);
-       devlink_resource_occ_get_unregister(devlink,
-                                           MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE);
-       devlink_resource_occ_get_unregister(devlink,
-                                           MLXSW_SP_RESOURCE_KVD_LINEAR);
-       mlxsw_sp_kvdl_parts_fini(mlxsw_sp);
-       kfree(mlxsw_sp->kvdl);
+       return kvdl->kvdl_ops->alloc_size_query(mlxsw_sp, kvdl->priv, type,
+                                               entry_count, p_alloc_count);
 }
index a82539609d492733332c2a6a18eed0952e4615c0..98dcaf78365c2cf8ad5539c1c11809d9e8e089e1 100644 (file)
@@ -1075,6 +1075,6 @@ void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)
        struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
 
        cancel_delayed_work_sync(&mr->stats_update_dw);
-       mr->mr_ops->fini(mr->priv);
+       mr->mr_ops->fini(mlxsw_sp, mr->priv);
        kfree(mr);
 }
index 7c864a86811d5321a8f822b438c56d43cdfb8342..c92fa90dca31e4c49f839e96051aef8cc9fd63b7 100644 (file)
@@ -46,15 +46,6 @@ enum mlxsw_sp_mr_route_action {
        MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD,
 };
 
-enum mlxsw_sp_mr_route_prio {
-       MLXSW_SP_MR_ROUTE_PRIO_SG,
-       MLXSW_SP_MR_ROUTE_PRIO_STARG,
-       MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
-       __MLXSW_SP_MR_ROUTE_PRIO_MAX
-};
-
-#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
-
 struct mlxsw_sp_mr_route_key {
        int vrid;
        enum mlxsw_sp_l3proto proto;
@@ -101,7 +92,7 @@ struct mlxsw_sp_mr_ops {
                              u16 erif_index);
        void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
                              void *route_priv);
-       void (*fini)(void *priv);
+       void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
 };
 
 struct mlxsw_sp_mr;
index 4f4c0d31188364a9f9821a5dbcd6653622ac3461..e9c9f1f45b9da0afaf8fe367293259ba49fe9345 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ * Copyright (c) 2018 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,7 +36,6 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
-#include <linux/parman.h>
 
 #include "spectrum_mr_tcam.h"
 #include "reg.h"
 #include "core_acl_flex_actions.h"
 #include "spectrum_mr.h"
 
-struct mlxsw_sp_mr_tcam_region {
-       struct mlxsw_sp *mlxsw_sp;
-       enum mlxsw_reg_rtar_key_type rtar_key_type;
-       struct parman *parman;
-       struct parman_prio *parman_prios;
-};
-
 struct mlxsw_sp_mr_tcam {
-       struct mlxsw_sp_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX];
+       void *priv;
 };
 
 /* This struct maps to one RIGR2 register entry */
@@ -84,8 +77,6 @@ mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list)
        INIT_LIST_HEAD(&erif_list->erif_sublists);
 }
 
-#define MLXSW_SP_KVDL_RIGR2_SIZE 1
-
 static struct mlxsw_sp_mr_erif_sublist *
 mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp,
                                struct mlxsw_sp_mr_tcam_erif_list *erif_list)
@@ -96,8 +87,8 @@ mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp,
        erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL);
        if (!erif_sublist)
                return ERR_PTR(-ENOMEM);
-       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_RIGR2_SIZE,
-                                 &erif_sublist->rigr2_kvdl_index);
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+                                 1, &erif_sublist->rigr2_kvdl_index);
        if (err) {
                kfree(erif_sublist);
                return ERR_PTR(err);
@@ -112,7 +103,8 @@ mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp,
                                 struct mlxsw_sp_mr_erif_sublist *erif_sublist)
 {
        list_del(&erif_sublist->list);
-       mlxsw_sp_kvdl_free(mlxsw_sp, erif_sublist->rigr2_kvdl_index);
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+                          1, erif_sublist->rigr2_kvdl_index);
        kfree(erif_sublist);
 }
 
@@ -221,12 +213,11 @@ struct mlxsw_sp_mr_tcam_route {
        struct mlxsw_sp_mr_tcam_erif_list erif_list;
        struct mlxsw_afa_block *afa_block;
        u32 counter_index;
-       struct parman_item parman_item;
-       struct parman_prio *parman_prio;
        enum mlxsw_sp_mr_route_action action;
        struct mlxsw_sp_mr_route_key key;
        u16 irif_index;
        u16 min_mtu;
+       void *priv;
 };
 
 static struct mlxsw_afa_block *
@@ -297,60 +288,6 @@ mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block)
        mlxsw_afa_block_destroy(afa_block);
 }
 
-static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
-                                         struct parman_item *parman_item,
-                                         struct mlxsw_sp_mr_route_key *key,
-                                         struct mlxsw_afa_block *afa_block)
-{
-       char rmft2_pl[MLXSW_REG_RMFT2_LEN];
-
-       switch (key->proto) {
-       case MLXSW_SP_L3_PROTO_IPV4:
-               mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index,
-                                         key->vrid,
-                                         MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
-                                         ntohl(key->group.addr4),
-                                         ntohl(key->group_mask.addr4),
-                                         ntohl(key->source.addr4),
-                                         ntohl(key->source_mask.addr4),
-                                         mlxsw_afa_block_first_set(afa_block));
-               break;
-       case MLXSW_SP_L3_PROTO_IPV6:
-               mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index,
-                                         key->vrid,
-                                         MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
-                                         key->group.addr6,
-                                         key->group_mask.addr6,
-                                         key->source.addr6,
-                                         key->source_mask.addr6,
-                                         mlxsw_afa_block_first_set(afa_block));
-       }
-
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
-}
-
-static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid,
-                                        struct mlxsw_sp_mr_route_key *key,
-                                        struct parman_item *parman_item)
-{
-       struct in6_addr zero_addr = IN6ADDR_ANY_INIT;
-       char rmft2_pl[MLXSW_REG_RMFT2_LEN];
-
-       switch (key->proto) {
-       case MLXSW_SP_L3_PROTO_IPV4:
-               mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index,
-                                         vrid, 0, 0, 0, 0, 0, 0, NULL);
-               break;
-       case MLXSW_SP_L3_PROTO_IPV6:
-               mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index,
-                                         vrid, 0, 0, zero_addr, zero_addr,
-                                         zero_addr, zero_addr, NULL);
-               break;
-       }
-
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
-}
-
 static int
 mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
                               struct mlxsw_sp_mr_tcam_erif_list *erif_list,
@@ -370,51 +307,12 @@ mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
        return 0;
 }
 
-static struct mlxsw_sp_mr_tcam_region *
-mlxsw_sp_mr_tcam_protocol_region(struct mlxsw_sp_mr_tcam *mr_tcam,
-                                enum mlxsw_sp_l3proto proto)
-{
-       return &mr_tcam->tcam_regions[proto];
-}
-
-static int
-mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam,
-                                      struct mlxsw_sp_mr_tcam_route *route,
-                                      enum mlxsw_sp_mr_route_prio prio)
-{
-       struct mlxsw_sp_mr_tcam_region *tcam_region;
-       int err;
-
-       tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam,
-                                                      route->key.proto);
-       err = parman_item_add(tcam_region->parman,
-                             &tcam_region->parman_prios[prio],
-                             &route->parman_item);
-       if (err)
-               return err;
-
-       route->parman_prio = &tcam_region->parman_prios[prio];
-       return 0;
-}
-
-static void
-mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam,
-                                         struct mlxsw_sp_mr_tcam_route *route)
-{
-       struct mlxsw_sp_mr_tcam_region *tcam_region;
-
-       tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam,
-                                                      route->key.proto);
-
-       parman_item_remove(tcam_region->parman,
-                          route->parman_prio, &route->parman_item);
-}
-
 static int
 mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
                              void *route_priv,
                              struct mlxsw_sp_mr_route_params *route_params)
 {
+       const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
        struct mlxsw_sp_mr_tcam_route *route = route_priv;
        struct mlxsw_sp_mr_tcam *mr_tcam = priv;
        int err;
@@ -448,22 +346,23 @@ mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
                goto err_afa_block_create;
        }
 
-       /* Allocate place in the TCAM */
-       err = mlxsw_sp_mr_tcam_route_parman_item_add(mr_tcam, route,
-                                                    route_params->prio);
-       if (err)
-               goto err_parman_item_add;
+       route->priv = kzalloc(ops->route_priv_size, GFP_KERNEL);
+       if (!route->priv) {
+               err = -ENOMEM;
+               goto err_route_priv_alloc;
+       }
 
        /* Write the route to the TCAM */
-       err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-                                            &route->key, route->afa_block);
+       err = ops->route_create(mlxsw_sp, mr_tcam->priv, route->priv,
+                               &route->key, route->afa_block,
+                               route_params->prio);
        if (err)
-               goto err_route_replace;
+               goto err_route_create;
        return 0;
 
-err_route_replace:
-       mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
-err_parman_item_add:
+err_route_create:
+       kfree(route->priv);
+err_route_priv_alloc:
        mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
 err_afa_block_create:
        mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
@@ -476,12 +375,12 @@ err_counter_alloc:
 static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp,
                                           void *priv, void *route_priv)
 {
+       const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
        struct mlxsw_sp_mr_tcam_route *route = route_priv;
        struct mlxsw_sp_mr_tcam *mr_tcam = priv;
 
-       mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid,
-                                     &route->key, &route->parman_item);
-       mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
+       ops->route_destroy(mlxsw_sp, mr_tcam->priv, route->priv, &route->key);
+       kfree(route->priv);
        mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
        mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
        mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
@@ -502,6 +401,7 @@ mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp,
                                     void *route_priv,
                                     enum mlxsw_sp_mr_route_action route_action)
 {
+       const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
        struct mlxsw_sp_mr_tcam_route *route = route_priv;
        struct mlxsw_afa_block *afa_block;
        int err;
@@ -516,8 +416,7 @@ mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp,
                return PTR_ERR(afa_block);
 
        /* Update the TCAM route entry */
-       err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-                                            &route->key, afa_block);
+       err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
        if (err)
                goto err;
 
@@ -534,6 +433,7 @@ err:
 static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp,
                                                 void *route_priv, u16 min_mtu)
 {
+       const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
        struct mlxsw_sp_mr_tcam_route *route = route_priv;
        struct mlxsw_afa_block *afa_block;
        int err;
@@ -549,8 +449,7 @@ static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp,
                return PTR_ERR(afa_block);
 
        /* Update the TCAM route entry */
-       err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-                                            &route->key, afa_block);
+       err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
        if (err)
                goto err;
 
@@ -596,6 +495,7 @@ static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp,
                                           void *route_priv, u16 erif_index)
 {
+       const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
        struct mlxsw_sp_mr_tcam_route *route = route_priv;
        struct mlxsw_sp_mr_erif_sublist *erif_sublist;
        struct mlxsw_sp_mr_tcam_erif_list erif_list;
@@ -630,8 +530,7 @@ static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp,
        }
 
        /* Update the TCAM route entry */
-       err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-                                            &route->key, afa_block);
+       err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
        if (err)
                goto err_route_write;
 
@@ -653,6 +552,7 @@ static int
 mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv,
                              struct mlxsw_sp_mr_route_info *route_info)
 {
+       const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
        struct mlxsw_sp_mr_tcam_route *route = route_priv;
        struct mlxsw_sp_mr_tcam_erif_list erif_list;
        struct mlxsw_afa_block *afa_block;
@@ -677,8 +577,7 @@ mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv,
        }
 
        /* Update the TCAM route entry */
-       err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-                                            &route->key, afa_block);
+       err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
        if (err)
                goto err_route_write;
 
@@ -699,167 +598,36 @@ err_erif_populate:
        return err;
 }
 
-#define MLXSW_SP_MR_TCAM_REGION_BASE_COUNT 16
-#define MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP 16
-
-static int
-mlxsw_sp_mr_tcam_region_alloc(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
-{
-       struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-       char rtar_pl[MLXSW_REG_RTAR_LEN];
-
-       mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE,
-                           mr_tcam_region->rtar_key_type,
-                           MLXSW_SP_MR_TCAM_REGION_BASE_COUNT);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
-}
-
-static void
-mlxsw_sp_mr_tcam_region_free(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
-{
-       struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-       char rtar_pl[MLXSW_REG_RTAR_LEN];
-
-       mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE,
-                           mr_tcam_region->rtar_key_type, 0);
-       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
-}
-
-static int mlxsw_sp_mr_tcam_region_parman_resize(void *priv,
-                                                unsigned long new_count)
-{
-       struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv;
-       struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-       char rtar_pl[MLXSW_REG_RTAR_LEN];
-       u64 max_tcam_rules;
-
-       max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
-       if (new_count > max_tcam_rules)
-               return -EINVAL;
-       mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE,
-                           mr_tcam_region->rtar_key_type, new_count);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
-}
-
-static void mlxsw_sp_mr_tcam_region_parman_move(void *priv,
-                                               unsigned long from_index,
-                                               unsigned long to_index,
-                                               unsigned long count)
-{
-       struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv;
-       struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-       char rrcr_pl[MLXSW_REG_RRCR_LEN];
-
-       mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE,
-                           from_index, count,
-                           mr_tcam_region->rtar_key_type, to_index);
-       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl);
-}
-
-static const struct parman_ops mlxsw_sp_mr_tcam_region_parman_ops = {
-       .base_count     = MLXSW_SP_MR_TCAM_REGION_BASE_COUNT,
-       .resize_step    = MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP,
-       .resize         = mlxsw_sp_mr_tcam_region_parman_resize,
-       .move           = mlxsw_sp_mr_tcam_region_parman_move,
-       .algo           = PARMAN_ALGO_TYPE_LSORT,
-};
-
-static int
-mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp,
-                            struct mlxsw_sp_mr_tcam_region *mr_tcam_region,
-                            enum mlxsw_reg_rtar_key_type rtar_key_type)
-{
-       struct parman_prio *parman_prios;
-       struct parman *parman;
-       int err;
-       int i;
-
-       mr_tcam_region->rtar_key_type = rtar_key_type;
-       mr_tcam_region->mlxsw_sp = mlxsw_sp;
-
-       err = mlxsw_sp_mr_tcam_region_alloc(mr_tcam_region);
-       if (err)
-               return err;
-
-       parman = parman_create(&mlxsw_sp_mr_tcam_region_parman_ops,
-                              mr_tcam_region);
-       if (!parman) {
-               err = -ENOMEM;
-               goto err_parman_create;
-       }
-       mr_tcam_region->parman = parman;
-
-       parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1,
-                                    sizeof(*parman_prios), GFP_KERNEL);
-       if (!parman_prios) {
-               err = -ENOMEM;
-               goto err_parman_prios_alloc;
-       }
-       mr_tcam_region->parman_prios = parman_prios;
-
-       for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
-               parman_prio_init(mr_tcam_region->parman,
-                                &mr_tcam_region->parman_prios[i], i);
-       return 0;
-
-err_parman_prios_alloc:
-       parman_destroy(parman);
-err_parman_create:
-       mlxsw_sp_mr_tcam_region_free(mr_tcam_region);
-       return err;
-}
-
-static void
-mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
-{
-       int i;
-
-       for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
-               parman_prio_fini(&mr_tcam_region->parman_prios[i]);
-       kfree(mr_tcam_region->parman_prios);
-       parman_destroy(mr_tcam_region->parman);
-       mlxsw_sp_mr_tcam_region_free(mr_tcam_region);
-}
-
 static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
 {
+       const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
        struct mlxsw_sp_mr_tcam *mr_tcam = priv;
-       struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
-       u32 rtar_key;
        int err;
 
-       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) ||
-           !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES))
                return -EIO;
 
-       rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST;
-       err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
-                                          &region[MLXSW_SP_L3_PROTO_IPV4],
-                                          rtar_key);
-       if (err)
-               return err;
+       mr_tcam->priv = kzalloc(ops->priv_size, GFP_KERNEL);
+       if (!mr_tcam->priv)
+               return -ENOMEM;
 
-       rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST;
-       err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
-                                          &region[MLXSW_SP_L3_PROTO_IPV6],
-                                          rtar_key);
+       err = ops->init(mlxsw_sp, mr_tcam->priv);
        if (err)
-               goto err_ipv6_region_init;
-
+               goto err_init;
        return 0;
 
-err_ipv6_region_init:
-       mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+err_init:
+       kfree(mr_tcam->priv);
        return err;
 }
 
-static void mlxsw_sp_mr_tcam_fini(void *priv)
+static void mlxsw_sp_mr_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
 {
+       const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
        struct mlxsw_sp_mr_tcam *mr_tcam = priv;
-       struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
 
-       mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV6]);
-       mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+       ops->fini(mr_tcam->priv);
+       kfree(mr_tcam->priv);
 }
 
 const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = {
index 77b2adb293415a9de16caaabbd203b397cd12a4a..eec7166fad62036f2c3de3fe73c79034b872338b 100644 (file)
@@ -48,6 +48,7 @@
 #include <linux/route.h>
 #include <linux/gcd.h>
 #include <linux/random.h>
+#include <linux/if_macvlan.h>
 #include <net/netevent.h>
 #include <net/neighbour.h>
 #include <net/arp.h>
@@ -60,6 +61,7 @@
 #include <net/ndisc.h>
 #include <net/ipv6.h>
 #include <net/fib_notifier.h>
+#include <net/switchdev.h>
 
 #include "spectrum.h"
 #include "core.h"
@@ -163,7 +165,9 @@ struct mlxsw_sp_rif_ops {
                      const struct mlxsw_sp_rif_params *params);
        int (*configure)(struct mlxsw_sp_rif *rif);
        void (*deconfigure)(struct mlxsw_sp_rif *rif);
-       struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
+       struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
+                                        struct netlink_ext_ack *extack);
+       void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
 };
 
 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
@@ -342,10 +346,6 @@ static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
        mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
 }
 
-static struct mlxsw_sp_rif *
-mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
-                        const struct net_device *dev);
-
 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
 
 struct mlxsw_sp_prefix_usage {
@@ -1109,7 +1109,8 @@ mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
        u32 tunnel_index;
        int err;
 
-       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+                                 1, &tunnel_index);
        if (err)
                return err;
 
@@ -1125,7 +1126,8 @@ static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
        /* Unlink this node from the IPIP entry that it's the decap entry of. */
        fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
        fib_entry->decap.ipip_entry = NULL;
-       mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+                          1, fib_entry->decap.tunnel_index);
 }
 
 static struct mlxsw_sp_fib_node *
@@ -2434,17 +2436,48 @@ static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
        kfree(net_work);
 }
 
+static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
+
+static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
+{
+       struct mlxsw_sp_netevent_work *net_work =
+               container_of(work, struct mlxsw_sp_netevent_work, work);
+       struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
+
+       __mlxsw_sp_router_init(mlxsw_sp);
+       kfree(net_work);
+}
+
+static int mlxsw_sp_router_schedule_work(struct net *net,
+                                        struct notifier_block *nb,
+                                        void (*cb)(struct work_struct *))
+{
+       struct mlxsw_sp_netevent_work *net_work;
+       struct mlxsw_sp_router *router;
+
+       if (!net_eq(net, &init_net))
+               return NOTIFY_DONE;
+
+       net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
+       if (!net_work)
+               return NOTIFY_BAD;
+
+       router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
+       INIT_WORK(&net_work->work, cb);
+       net_work->mlxsw_sp = router->mlxsw_sp;
+       mlxsw_core_schedule_work(&net_work->work);
+       return NOTIFY_DONE;
+}
+
 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
                                          unsigned long event, void *ptr)
 {
        struct mlxsw_sp_netevent_work *net_work;
        struct mlxsw_sp_port *mlxsw_sp_port;
-       struct mlxsw_sp_router *router;
        struct mlxsw_sp *mlxsw_sp;
        unsigned long interval;
        struct neigh_parms *p;
        struct neighbour *n;
-       struct net *net;
 
        switch (event) {
        case NETEVENT_DELAY_PROBE_TIME_UPDATE:
@@ -2498,20 +2531,12 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
                break;
        case NETEVENT_IPV4_MPATH_HASH_UPDATE:
        case NETEVENT_IPV6_MPATH_HASH_UPDATE:
-               net = ptr;
-
-               if (!net_eq(net, &init_net))
-                       return NOTIFY_DONE;
-
-               net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
-               if (!net_work)
-                       return NOTIFY_BAD;
+               return mlxsw_sp_router_schedule_work(ptr, nb,
+                               mlxsw_sp_router_mp_hash_event_work);
 
-               router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
-               INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
-               net_work->mlxsw_sp = router->mlxsw_sp;
-               mlxsw_core_schedule_work(&net_work->work);
-               break;
+       case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
+               return mlxsw_sp_router_schedule_work(ptr, nb,
+                               mlxsw_sp_router_update_priority_work);
        }
 
        return NOTIFY_DONE;
@@ -3165,8 +3190,9 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
         * by the device and make sure the request can be satisfied.
         */
        mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
-       err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
-                                            &alloc_size);
+       err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
+                                             MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+                                             *p_adj_grp_size, &alloc_size);
        if (err)
                return err;
        /* It is possible the allocation results in more allocated
@@ -3278,7 +3304,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
                /* No valid allocation size available. */
                goto set_trap;
 
-       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+                                 ecmp_size, &adj_index);
        if (err) {
                /* We ran out of KVD linear space, just set the
                 * trap and let everything flow through kernel.
@@ -3313,7 +3340,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 
        err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
                                             old_adj_index, old_ecmp_size);
-       mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+                          old_ecmp_size, old_adj_index);
        if (err) {
                dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
                goto set_trap;
@@ -3335,7 +3363,8 @@ set_trap:
        if (err)
                dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
        if (old_adj_index_valid)
-               mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
+               mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+                                  nh_grp->ecmp_size, nh_grp->adj_index);
 }
 
 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
@@ -5967,7 +5996,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
        return NOTIFY_DONE;
 }
 
-static struct mlxsw_sp_rif *
+struct mlxsw_sp_rif *
 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
                         const struct net_device *dev)
 {
@@ -6024,6 +6053,12 @@ mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
                    !list_empty(&inet6_dev->addr_list))
                        addr_list_empty = false;
 
+               /* macvlans do not have a RIF, but rather piggy back on the
+                * RIF of their lower device.
+                */
+               if (netif_is_macvlan(dev) && addr_list_empty)
+                       return true;
+
                if (rif && addr_list_empty &&
                    !netif_is_l3_slave(rif->dev))
                        return true;
@@ -6125,6 +6160,11 @@ const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
        return rif->dev;
 }
 
+struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
+{
+       return rif->fid;
+}
+
 static struct mlxsw_sp_rif *
 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
                    const struct mlxsw_sp_rif_params *params,
@@ -6162,7 +6202,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
        rif->ops = ops;
 
        if (ops->fid_get) {
-               fid = ops->fid_get(rif);
+               fid = ops->fid_get(rif, extack);
                if (IS_ERR(fid)) {
                        err = PTR_ERR(fid);
                        goto err_fid_get;
@@ -6267,7 +6307,7 @@ mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
        }
 
        /* FID was already created, just take a reference */
-       fid = rif->ops->fid_get(rif);
+       fid = rif->ops->fid_get(rif, extack);
        err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
        if (err)
                goto err_fid_port_vid_map;
@@ -6432,6 +6472,123 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
        return 0;
 }
 
+static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
+{
+       u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
+       u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
+
+       return ether_addr_equal_masked(mac, vrrp4, mask);
+}
+
+static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
+{
+       u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
+       u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
+
+       return ether_addr_equal_masked(mac, vrrp6, mask);
+}
+
+static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
+                               const u8 *mac, bool adding)
+{
+       char ritr_pl[MLXSW_REG_RITR_LEN];
+       u8 vrrp_id = adding ? mac[5] : 0;
+       int err;
+
+       if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
+           !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
+               return 0;
+
+       mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+       if (err)
+               return err;
+
+       if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
+               mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
+       else
+               mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
+                                   const struct net_device *macvlan_dev,
+                                   struct netlink_ext_ack *extack)
+{
+       struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
+       struct mlxsw_sp_rif *rif;
+       int err;
+
+       rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
+       if (!rif) {
+               NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+               return -EOPNOTSUPP;
+       }
+
+       err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
+                                 mlxsw_sp_fid_index(rif->fid), true);
+       if (err)
+               return err;
+
+       err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
+                                  macvlan_dev->dev_addr, true);
+       if (err)
+               goto err_rif_vrrp_add;
+
+       /* Make sure the bridge driver does not have this MAC pointing at
+        * some other port.
+        */
+       if (rif->ops->fdb_del)
+               rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
+
+       return 0;
+
+err_rif_vrrp_add:
+       mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
+                           mlxsw_sp_fid_index(rif->fid), false);
+       return err;
+}
+
+void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
+                             const struct net_device *macvlan_dev)
+{
+       struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
+       struct mlxsw_sp_rif *rif;
+
+       rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
+       /* If we do not have a RIF, then we already took care of
+        * removing the macvlan's MAC during RIF deletion.
+        */
+       if (!rif)
+               return;
+       mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
+                            false);
+       mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
+                           mlxsw_sp_fid_index(rif->fid), false);
+}
+
+static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
+                                          unsigned long event,
+                                          struct netlink_ext_ack *extack)
+{
+       struct mlxsw_sp *mlxsw_sp;
+
+       mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
+       if (!mlxsw_sp)
+               return 0;
+
+       switch (event) {
+       case NETDEV_UP:
+               return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
+       case NETDEV_DOWN:
+               mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
+               break;
+       }
+
+       return 0;
+}
+
 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
                                     unsigned long event,
                                     struct netlink_ext_ack *extack)
@@ -6444,6 +6601,8 @@ static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
                return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
        else if (is_vlan_dev(dev))
                return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
+       else if (netif_is_macvlan(dev))
+               return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
        else
                return 0;
 }
@@ -6684,7 +6843,10 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
        int err = 0;
 
-       if (!mlxsw_sp)
+       /* We do not create a RIF for a macvlan, but only use it to
+        * direct more MAC addresses to the router.
+        */
+       if (!mlxsw_sp || netif_is_macvlan(l3_dev))
                return 0;
 
        switch (event) {
@@ -6705,6 +6867,27 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
        return err;
 }
 
+static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
+{
+       struct mlxsw_sp_rif *rif = data;
+
+       if (!netif_is_macvlan(dev))
+               return 0;
+
+       return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
+                                  mlxsw_sp_fid_index(rif->fid), false);
+}
+
+static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
+{
+       if (!netif_is_macvlan_port(rif->dev))
+               return 0;
+
+       netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
+       return netdev_walk_all_upper_dev_rcu(rif->dev,
+                                            __mlxsw_sp_rif_macvlan_flush, rif);
+}
+
 static struct mlxsw_sp_rif_subport *
 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
 {
@@ -6771,11 +6954,13 @@ static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
        mlxsw_sp_fid_rif_set(fid, NULL);
        mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
                            mlxsw_sp_fid_index(fid), false);
+       mlxsw_sp_rif_macvlan_flush(rif);
        mlxsw_sp_rif_subport_op(rif, false);
 }
 
 static struct mlxsw_sp_fid *
-mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
+mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
+                            struct netlink_ext_ack *extack)
 {
        return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
 }
@@ -6857,6 +7042,7 @@ static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
        mlxsw_sp_fid_rif_set(fid, NULL);
        mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
                            mlxsw_sp_fid_index(fid), false);
+       mlxsw_sp_rif_macvlan_flush(rif);
        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
                               mlxsw_sp_router_port(mlxsw_sp), false);
        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
@@ -6865,19 +7051,49 @@ static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
 }
 
 static struct mlxsw_sp_fid *
-mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
+mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
+                         struct netlink_ext_ack *extack)
 {
-       u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
+       u16 vid;
+       int err;
+
+       if (is_vlan_dev(rif->dev)) {
+               vid = vlan_dev_vlan_id(rif->dev);
+       } else {
+               err = br_vlan_get_pvid(rif->dev, &vid);
+               if (err < 0 || !vid) {
+                       NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
+                       return ERR_PTR(-EINVAL);
+               }
+       }
 
        return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
 }
 
+static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
+{
+       u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
+       struct switchdev_notifier_fdb_info info;
+       struct net_device *br_dev;
+       struct net_device *dev;
+
+       br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
+       dev = br_fdb_find_port(br_dev, mac, vid);
+       if (!dev)
+               return;
+
+       info.addr = mac;
+       info.vid = vid;
+       call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
+}
+
 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
        .type                   = MLXSW_SP_RIF_TYPE_VLAN,
        .rif_size               = sizeof(struct mlxsw_sp_rif),
        .configure              = mlxsw_sp_rif_vlan_configure,
        .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
        .fid_get                = mlxsw_sp_rif_vlan_fid_get,
+       .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
 };
 
 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
@@ -6929,6 +7145,7 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
        mlxsw_sp_fid_rif_set(fid, NULL);
        mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
                            mlxsw_sp_fid_index(fid), false);
+       mlxsw_sp_rif_macvlan_flush(rif);
        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
                               mlxsw_sp_router_port(mlxsw_sp), false);
        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
@@ -6937,17 +7154,33 @@ static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
 }
 
 static struct mlxsw_sp_fid *
-mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
+mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
+                        struct netlink_ext_ack *extack)
 {
        return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
 }
 
+static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
+{
+       struct switchdev_notifier_fdb_info info;
+       struct net_device *dev;
+
+       dev = br_fdb_find_port(rif->dev, mac, 0);
+       if (!dev)
+               return;
+
+       info.addr = mac;
+       info.vid = 0;
+       call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
+}
+
 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
        .type                   = MLXSW_SP_RIF_TYPE_FID,
        .rif_size               = sizeof(struct mlxsw_sp_rif),
        .configure              = mlxsw_sp_rif_fid_configure,
        .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
        .fid_get                = mlxsw_sp_rif_fid_fid_get,
+       .fdb_del                = mlxsw_sp_rif_fid_fdb_del,
 };
 
 static struct mlxsw_sp_rif_ipip_lb *
@@ -7172,6 +7405,7 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
 
 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
+       bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
        char rgcr_pl[MLXSW_REG_RGCR_LEN];
        u64 max_rifs;
        int err;
@@ -7182,7 +7416,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 
        mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
        mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
-       mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
+       mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
        if (err)
                return err;
index a01edcf567971f860f59f9b8339d75b8aba786ec..52e25695625c6eb815b2dd4c23ae5e85417180cf 100644 (file)
@@ -66,6 +66,8 @@ struct mlxsw_sp_neigh_entry;
 struct mlxsw_sp_nexthop;
 struct mlxsw_sp_ipip_entry;
 
+struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+                                             const struct net_device *dev);
 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
                                           u16 rif_index);
 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
@@ -75,6 +77,7 @@ u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif);
+struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif);
 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
                                   struct mlxsw_sp_rif *rif,
                                   enum mlxsw_sp_rif_counter_dir dir,
index 3d187d88cc7c5c9025cc5f445cc3fea588ca9a5d..e42d640cddab811fbeff2111309b8d2ad47bb280 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/list.h>
 #include <net/arp.h>
 #include <net/gre.h>
+#include <net/lag.h>
 #include <net/ndisc.h>
 #include <net/ip6_tunnel.h>
 
@@ -254,7 +255,9 @@ mlxsw_sp_span_entry_lag(struct net_device *lag_dev)
        struct list_head *iter;
 
        netdev_for_each_lower_dev(lag_dev, dev, iter)
-               if ((dev->flags & IFF_UP) && mlxsw_sp_port_dev_check(dev))
+               if (netif_carrier_ok(dev) &&
+                   net_lag_port_dev_txable(dev) &&
+                   mlxsw_sp_port_dev_check(dev))
                        return dev;
 
        return NULL;
index eea5666a86b2ae341524710e678d4caf5776a18b..da94e1eb9e1693bfed9d0cb73827013c1fbea9db 100644 (file)
@@ -1135,6 +1135,39 @@ err_port_vlan_set:
        return err;
 }
 
+static int
+mlxsw_sp_br_ban_rif_pvid_change(struct mlxsw_sp *mlxsw_sp,
+                               const struct net_device *br_dev,
+                               const struct switchdev_obj_port_vlan *vlan)
+{
+       struct mlxsw_sp_rif *rif;
+       struct mlxsw_sp_fid *fid;
+       u16 pvid;
+       u16 vid;
+
+       rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev);
+       if (!rif)
+               return 0;
+       fid = mlxsw_sp_rif_fid(rif);
+       pvid = mlxsw_sp_fid_8021q_vid(fid);
+
+       for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+               if (vlan->flags & BRIDGE_VLAN_INFO_PVID) {
+                       if (vid != pvid) {
+                               netdev_err(br_dev, "Can't change PVID, it's used by router interface\n");
+                               return -EBUSY;
+                       }
+               } else {
+                       if (vid == pvid) {
+                               netdev_err(br_dev, "Can't remove PVID, it's used by router interface\n");
+                               return -EBUSY;
+                       }
+               }
+       }
+
+       return 0;
+}
+
 static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
                                   const struct switchdev_obj_port_vlan *vlan,
                                   struct switchdev_trans *trans)
@@ -1146,8 +1179,18 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
        struct mlxsw_sp_bridge_port *bridge_port;
        u16 vid;
 
-       if (netif_is_bridge_master(orig_dev))
-               return -EOPNOTSUPP;
+       if (netif_is_bridge_master(orig_dev)) {
+               int err = 0;
+
+               if ((vlan->flags & BRIDGE_VLAN_INFO_BRENTRY) &&
+                   br_vlan_enabled(orig_dev) &&
+                   switchdev_trans_ph_prepare(trans))
+                       err = mlxsw_sp_br_ban_rif_pvid_change(mlxsw_sp,
+                                                             orig_dev, vlan);
+               if (!err)
+                       err = -EOPNOTSUPP;
+               return err;
+       }
 
        if (switchdev_trans_ph_prepare(trans))
                return 0;
index 399e9d6993f72500bc2339b285f7f0d55de69c3e..eb437f59640daa7ddadb74e61e8203dfb2971b6a 100644 (file)
@@ -63,6 +63,7 @@ enum {
        MLXSW_TRAP_ID_LBERROR = 0x54,
        MLXSW_TRAP_ID_IPV4_OSPF = 0x55,
        MLXSW_TRAP_ID_IPV4_PIM = 0x58,
+       MLXSW_TRAP_ID_IPV4_VRRP = 0x59,
        MLXSW_TRAP_ID_RPF = 0x5C,
        MLXSW_TRAP_ID_IP2ME = 0x5F,
        MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60,
@@ -78,6 +79,7 @@ enum {
        MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F,
        MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
        MLXSW_TRAP_ID_IPV6_PIM = 0x79,
+       MLXSW_TRAP_ID_IPV6_VRRP = 0x7A,
        MLXSW_TRAP_ID_IPV4_BGP = 0x88,
        MLXSW_TRAP_ID_IPV6_BGP = 0x89,
        MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,
index b72d1bd11296bba36c83e1252fcfa0a397ee5c84..ebbdfb908745105470aa49950835da692bb7eba9 100644 (file)
@@ -3373,7 +3373,6 @@ static void port_get_link_speed(struct ksz_port *port)
  */
 static void port_set_link_speed(struct ksz_port *port)
 {
-       struct ksz_port_info *info;
        struct ksz_hw *hw = port->hw;
        u16 data;
        u16 cfg;
@@ -3382,8 +3381,6 @@ static void port_set_link_speed(struct ksz_port *port)
        int p;
 
        for (i = 0, p = port->first_port; i < port->port_cnt; i++, p++) {
-               info = &hw->port_info[p];
-
                port_r16(hw, p, KS884X_PORT_CTRL_4_OFFSET, &data);
                port_r8(hw, p, KS884X_PORT_STATUS_OFFSET, &status);
 
index 2e982cc249fbb59b76b54057576f989d7be7ee0b..43f47cb45fe2cee55e372d2381cd5be8a8767284 100644 (file)
@@ -6,4 +6,4 @@ obj-$(CONFIG_ENC28J60) += enc28j60.o
 obj-$(CONFIG_ENCX24J600) += encx24j600.o encx24j600-regmap.o
 obj-$(CONFIG_LAN743X) += lan743x.o
 
-lan743x-objs := lan743x_main.o
+lan743x-objs := lan743x_main.o lan743x_ethtool.o
diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c
new file mode 100644 (file)
index 0000000..c25b3e9
--- /dev/null
@@ -0,0 +1,696 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2018 Microchip Technology Inc. */
+
+#include <linux/netdevice.h>
+#include "lan743x_main.h"
+#include "lan743x_ethtool.h"
+#include <linux/pci.h>
+#include <linux/phy.h>
+
+/* eeprom */
+#define LAN743X_EEPROM_MAGIC               (0x74A5)
+#define LAN743X_OTP_MAGIC                  (0x74F3)
+#define EEPROM_INDICATOR_1                 (0xA5)
+#define EEPROM_INDICATOR_2                 (0xAA)
+#define EEPROM_MAC_OFFSET                  (0x01)
+#define MAX_EEPROM_SIZE                            512
+#define OTP_INDICATOR_1                            (0xF3)
+#define OTP_INDICATOR_2                            (0xF7)
+
+static int lan743x_otp_write(struct lan743x_adapter *adapter, u32 offset,
+                            u32 length, u8 *data)
+{
+       unsigned long timeout;
+       u32 buf;
+       int i;
+
+       buf = lan743x_csr_read(adapter, OTP_PWR_DN);
+
+       if (buf & OTP_PWR_DN_PWRDN_N_) {
+               /* clear it and wait to be cleared */
+               lan743x_csr_write(adapter, OTP_PWR_DN, 0);
+
+               timeout = jiffies + HZ;
+               do {
+                       udelay(1);
+                       buf = lan743x_csr_read(adapter, OTP_PWR_DN);
+                       if (time_after(jiffies, timeout)) {
+                               netif_warn(adapter, drv, adapter->netdev,
+                                          "timeout on OTP_PWR_DN completion\n");
+                               return -EIO;
+                       }
+               } while (buf & OTP_PWR_DN_PWRDN_N_);
+       }
+
+       /* set to BYTE program mode */
+       lan743x_csr_write(adapter, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_);
+
+       for (i = 0; i < length; i++) {
+               lan743x_csr_write(adapter, OTP_ADDR1,
+                                 ((offset + i) >> 8) &
+                                 OTP_ADDR1_15_11_MASK_);
+               lan743x_csr_write(adapter, OTP_ADDR2,
+                                 ((offset + i) &
+                                 OTP_ADDR2_10_3_MASK_));
+               lan743x_csr_write(adapter, OTP_PRGM_DATA, data[i]);
+               lan743x_csr_write(adapter, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_);
+               lan743x_csr_write(adapter, OTP_CMD_GO, OTP_CMD_GO_GO_);
+
+               timeout = jiffies + HZ;
+               do {
+                       udelay(1);
+                       buf = lan743x_csr_read(adapter, OTP_STATUS);
+                       if (time_after(jiffies, timeout)) {
+                               netif_warn(adapter, drv, adapter->netdev,
+                                          "Timeout on OTP_STATUS completion\n");
+                               return -EIO;
+                       }
+               } while (buf & OTP_STATUS_BUSY_);
+       }
+
+       return 0;
+}
+
+static int lan743x_eeprom_wait(struct lan743x_adapter *adapter)
+{
+       unsigned long start_time = jiffies;
+       u32 val;
+
+       do {
+               val = lan743x_csr_read(adapter, E2P_CMD);
+
+               if (!(val & E2P_CMD_EPC_BUSY_) ||
+                   (val & E2P_CMD_EPC_TIMEOUT_))
+                       break;
+               usleep_range(40, 100);
+       } while (!time_after(jiffies, start_time + HZ));
+
+       if (val & (E2P_CMD_EPC_TIMEOUT_ | E2P_CMD_EPC_BUSY_)) {
+               netif_warn(adapter, drv, adapter->netdev,
+                          "EEPROM read operation timeout\n");
+               return -EIO;
+       }
+
+       return 0;
+}
+
+static int lan743x_eeprom_confirm_not_busy(struct lan743x_adapter *adapter)
+{
+       unsigned long start_time = jiffies;
+       u32 val;
+
+       do {
+               val = lan743x_csr_read(adapter, E2P_CMD);
+
+               if (!(val & E2P_CMD_EPC_BUSY_))
+                       return 0;
+
+               usleep_range(40, 100);
+       } while (!time_after(jiffies, start_time + HZ));
+
+       netif_warn(adapter, drv, adapter->netdev, "EEPROM is busy\n");
+       return -EIO;
+}
+
+static int lan743x_eeprom_read(struct lan743x_adapter *adapter,
+                              u32 offset, u32 length, u8 *data)
+{
+       int retval;
+       u32 val;
+       int i;
+
+       retval = lan743x_eeprom_confirm_not_busy(adapter);
+       if (retval)
+               return retval;
+
+       for (i = 0; i < length; i++) {
+               val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_READ_;
+               val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
+               lan743x_csr_write(adapter, E2P_CMD, val);
+
+               retval = lan743x_eeprom_wait(adapter);
+               if (retval < 0)
+                       return retval;
+
+               val = lan743x_csr_read(adapter, E2P_DATA);
+               data[i] = val & 0xFF;
+               offset++;
+       }
+
+       return 0;
+}
+
+static int lan743x_eeprom_write(struct lan743x_adapter *adapter,
+                               u32 offset, u32 length, u8 *data)
+{
+       int retval;
+       u32 val;
+       int i;
+
+       retval = lan743x_eeprom_confirm_not_busy(adapter);
+       if (retval)
+               return retval;
+
+       /* Issue write/erase enable command */
+       val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_EWEN_;
+       lan743x_csr_write(adapter, E2P_CMD, val);
+
+       retval = lan743x_eeprom_wait(adapter);
+       if (retval < 0)
+               return retval;
+
+       for (i = 0; i < length; i++) {
+               /* Fill data register */
+               val = data[i];
+               lan743x_csr_write(adapter, E2P_DATA, val);
+
+               /* Send "write" command */
+               val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_WRITE_;
+               val |= (offset & E2P_CMD_EPC_ADDR_MASK_);
+               lan743x_csr_write(adapter, E2P_CMD, val);
+
+               retval = lan743x_eeprom_wait(adapter);
+               if (retval < 0)
+                       return retval;
+
+               offset++;
+       }
+
+       return 0;
+}
+
+static void lan743x_ethtool_get_drvinfo(struct net_device *netdev,
+                                       struct ethtool_drvinfo *info)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
+       strlcpy(info->bus_info,
+               pci_name(adapter->pdev), sizeof(info->bus_info));
+}
+
+static u32 lan743x_ethtool_get_msglevel(struct net_device *netdev)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       return adapter->msg_enable;
+}
+
+static void lan743x_ethtool_set_msglevel(struct net_device *netdev,
+                                        u32 msglevel)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       adapter->msg_enable = msglevel;
+}
+
+static int lan743x_ethtool_get_eeprom_len(struct net_device *netdev)
+{
+       return MAX_EEPROM_SIZE;
+}
+
+static int lan743x_ethtool_get_eeprom(struct net_device *netdev,
+                                     struct ethtool_eeprom *ee, u8 *data)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       return lan743x_eeprom_read(adapter, ee->offset, ee->len, data);
+}
+
+static int lan743x_ethtool_set_eeprom(struct net_device *netdev,
+                                     struct ethtool_eeprom *ee, u8 *data)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       int ret = -EINVAL;
+
+       if (ee->magic == LAN743X_EEPROM_MAGIC)
+               ret = lan743x_eeprom_write(adapter, ee->offset, ee->len,
+                                          data);
+       /* Beware!  OTP is One Time Programming ONLY!
+        * So do some strict condition check before messing up
+        */
+       else if ((ee->magic == LAN743X_OTP_MAGIC) &&
+                (ee->offset == 0) &&
+                (ee->len == MAX_EEPROM_SIZE) &&
+                (data[0] == OTP_INDICATOR_1))
+               ret = lan743x_otp_write(adapter, ee->offset, ee->len, data);
+
+       return ret;
+}
+
+static const char lan743x_set0_hw_cnt_strings[][ETH_GSTRING_LEN] = {
+       "RX FCS Errors",
+       "RX Alignment Errors",
+       "Rx Fragment Errors",
+       "RX Jabber Errors",
+       "RX Undersize Frame Errors",
+       "RX Oversize Frame Errors",
+       "RX Dropped Frames",
+       "RX Unicast Byte Count",
+       "RX Broadcast Byte Count",
+       "RX Multicast Byte Count",
+       "RX Unicast Frames",
+       "RX Broadcast Frames",
+       "RX Multicast Frames",
+       "RX Pause Frames",
+       "RX 64 Byte Frames",
+       "RX 65 - 127 Byte Frames",
+       "RX 128 - 255 Byte Frames",
+       "RX 256 - 511 Bytes Frames",
+       "RX 512 - 1023 Byte Frames",
+       "RX 1024 - 1518 Byte Frames",
+       "RX Greater 1518 Byte Frames",
+};
+
+static const char lan743x_set1_sw_cnt_strings[][ETH_GSTRING_LEN] = {
+       "RX Queue 0 Frames",
+       "RX Queue 1 Frames",
+       "RX Queue 2 Frames",
+       "RX Queue 3 Frames",
+};
+
+static const char lan743x_set2_hw_cnt_strings[][ETH_GSTRING_LEN] = {
+       "RX Total Frames",
+       "EEE RX LPI Transitions",
+       "EEE RX LPI Time",
+       "RX Counter Rollover Status",
+       "TX FCS Errors",
+       "TX Excess Deferral Errors",
+       "TX Carrier Errors",
+       "TX Bad Byte Count",
+       "TX Single Collisions",
+       "TX Multiple Collisions",
+       "TX Excessive Collision",
+       "TX Late Collisions",
+       "TX Unicast Byte Count",
+       "TX Broadcast Byte Count",
+       "TX Multicast Byte Count",
+       "TX Unicast Frames",
+       "TX Broadcast Frames",
+       "TX Multicast Frames",
+       "TX Pause Frames",
+       "TX 64 Byte Frames",
+       "TX 65 - 127 Byte Frames",
+       "TX 128 - 255 Byte Frames",
+       "TX 256 - 511 Bytes Frames",
+       "TX 512 - 1023 Byte Frames",
+       "TX 1024 - 1518 Byte Frames",
+       "TX Greater 1518 Byte Frames",
+       "TX Total Frames",
+       "EEE TX LPI Transitions",
+       "EEE TX LPI Time",
+       "TX Counter Rollover Status",
+};
+
+static const u32 lan743x_set0_hw_cnt_addr[] = {
+       STAT_RX_FCS_ERRORS,
+       STAT_RX_ALIGNMENT_ERRORS,
+       STAT_RX_FRAGMENT_ERRORS,
+       STAT_RX_JABBER_ERRORS,
+       STAT_RX_UNDERSIZE_FRAME_ERRORS,
+       STAT_RX_OVERSIZE_FRAME_ERRORS,
+       STAT_RX_DROPPED_FRAMES,
+       STAT_RX_UNICAST_BYTE_COUNT,
+       STAT_RX_BROADCAST_BYTE_COUNT,
+       STAT_RX_MULTICAST_BYTE_COUNT,
+       STAT_RX_UNICAST_FRAMES,
+       STAT_RX_BROADCAST_FRAMES,
+       STAT_RX_MULTICAST_FRAMES,
+       STAT_RX_PAUSE_FRAMES,
+       STAT_RX_64_BYTE_FRAMES,
+       STAT_RX_65_127_BYTE_FRAMES,
+       STAT_RX_128_255_BYTE_FRAMES,
+       STAT_RX_256_511_BYTES_FRAMES,
+       STAT_RX_512_1023_BYTE_FRAMES,
+       STAT_RX_1024_1518_BYTE_FRAMES,
+       STAT_RX_GREATER_1518_BYTE_FRAMES,
+};
+
+static const u32 lan743x_set2_hw_cnt_addr[] = {
+       STAT_RX_TOTAL_FRAMES,
+       STAT_EEE_RX_LPI_TRANSITIONS,
+       STAT_EEE_RX_LPI_TIME,
+       STAT_RX_COUNTER_ROLLOVER_STATUS,
+       STAT_TX_FCS_ERRORS,
+       STAT_TX_EXCESS_DEFERRAL_ERRORS,
+       STAT_TX_CARRIER_ERRORS,
+       STAT_TX_BAD_BYTE_COUNT,
+       STAT_TX_SINGLE_COLLISIONS,
+       STAT_TX_MULTIPLE_COLLISIONS,
+       STAT_TX_EXCESSIVE_COLLISION,
+       STAT_TX_LATE_COLLISIONS,
+       STAT_TX_UNICAST_BYTE_COUNT,
+       STAT_TX_BROADCAST_BYTE_COUNT,
+       STAT_TX_MULTICAST_BYTE_COUNT,
+       STAT_TX_UNICAST_FRAMES,
+       STAT_TX_BROADCAST_FRAMES,
+       STAT_TX_MULTICAST_FRAMES,
+       STAT_TX_PAUSE_FRAMES,
+       STAT_TX_64_BYTE_FRAMES,
+       STAT_TX_65_127_BYTE_FRAMES,
+       STAT_TX_128_255_BYTE_FRAMES,
+       STAT_TX_256_511_BYTES_FRAMES,
+       STAT_TX_512_1023_BYTE_FRAMES,
+       STAT_TX_1024_1518_BYTE_FRAMES,
+       STAT_TX_GREATER_1518_BYTE_FRAMES,
+       STAT_TX_TOTAL_FRAMES,
+       STAT_EEE_TX_LPI_TRANSITIONS,
+       STAT_EEE_TX_LPI_TIME,
+       STAT_TX_COUNTER_ROLLOVER_STATUS
+};
+
+static void lan743x_ethtool_get_strings(struct net_device *netdev,
+                                       u32 stringset, u8 *data)
+{
+       switch (stringset) {
+       case ETH_SS_STATS:
+               memcpy(data, lan743x_set0_hw_cnt_strings,
+                      sizeof(lan743x_set0_hw_cnt_strings));
+               memcpy(&data[sizeof(lan743x_set0_hw_cnt_strings)],
+                      lan743x_set1_sw_cnt_strings,
+                      sizeof(lan743x_set1_sw_cnt_strings));
+               memcpy(&data[sizeof(lan743x_set0_hw_cnt_strings) +
+                      sizeof(lan743x_set1_sw_cnt_strings)],
+                      lan743x_set2_hw_cnt_strings,
+                      sizeof(lan743x_set2_hw_cnt_strings));
+               break;
+       }
+}
+
+static void lan743x_ethtool_get_ethtool_stats(struct net_device *netdev,
+                                             struct ethtool_stats *stats,
+                                             u64 *data)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       int data_index = 0;
+       u32 buf;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(lan743x_set0_hw_cnt_addr); i++) {
+               buf = lan743x_csr_read(adapter, lan743x_set0_hw_cnt_addr[i]);
+               data[data_index++] = (u64)buf;
+       }
+       for (i = 0; i < ARRAY_SIZE(adapter->rx); i++)
+               data[data_index++] = (u64)(adapter->rx[i].frame_count);
+       for (i = 0; i < ARRAY_SIZE(lan743x_set2_hw_cnt_addr); i++) {
+               buf = lan743x_csr_read(adapter, lan743x_set2_hw_cnt_addr[i]);
+               data[data_index++] = (u64)buf;
+       }
+}
+
+static int lan743x_ethtool_get_sset_count(struct net_device *netdev, int sset)
+{
+       switch (sset) {
+       case ETH_SS_STATS:
+       {
+               int ret;
+
+               ret = ARRAY_SIZE(lan743x_set0_hw_cnt_strings);
+               ret += ARRAY_SIZE(lan743x_set1_sw_cnt_strings);
+               ret += ARRAY_SIZE(lan743x_set2_hw_cnt_strings);
+               return ret;
+       }
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int lan743x_ethtool_get_rxnfc(struct net_device *netdev,
+                                    struct ethtool_rxnfc *rxnfc,
+                                    u32 *rule_locs)
+{
+       switch (rxnfc->cmd) {
+       case ETHTOOL_GRXFH:
+               rxnfc->data = 0;
+               switch (rxnfc->flow_type) {
+               case TCP_V4_FLOW:case UDP_V4_FLOW:
+               case TCP_V6_FLOW:case UDP_V6_FLOW:
+                       rxnfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+                       /* fall through */
+               case IPV4_FLOW: case IPV6_FLOW:
+                       rxnfc->data |= RXH_IP_SRC | RXH_IP_DST;
+                       return 0;
+               }
+               break;
+       case ETHTOOL_GRXRINGS:
+               rxnfc->data = LAN743X_USED_RX_CHANNELS;
+               return 0;
+       }
+       return -EOPNOTSUPP;
+}
+
+static u32 lan743x_ethtool_get_rxfh_key_size(struct net_device *netdev)
+{
+       return 40;
+}
+
+static u32 lan743x_ethtool_get_rxfh_indir_size(struct net_device *netdev)
+{
+       return 128;
+}
+
+static int lan743x_ethtool_get_rxfh(struct net_device *netdev,
+                                   u32 *indir, u8 *key, u8 *hfunc)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       if (indir) {
+               int dw_index;
+               int byte_index = 0;
+
+               for (dw_index = 0; dw_index < 32; dw_index++) {
+                       u32 four_entries =
+                               lan743x_csr_read(adapter, RFE_INDX(dw_index));
+
+                       byte_index = dw_index << 2;
+                       indir[byte_index + 0] =
+                               ((four_entries >> 0) & 0x000000FF);
+                       indir[byte_index + 1] =
+                               ((four_entries >> 8) & 0x000000FF);
+                       indir[byte_index + 2] =
+                               ((four_entries >> 16) & 0x000000FF);
+                       indir[byte_index + 3] =
+                               ((four_entries >> 24) & 0x000000FF);
+               }
+       }
+       if (key) {
+               int dword_index;
+               int byte_index = 0;
+
+               for (dword_index = 0; dword_index < 10; dword_index++) {
+                       u32 four_entries =
+                               lan743x_csr_read(adapter,
+                                                RFE_HASH_KEY(dword_index));
+
+                       byte_index = dword_index << 2;
+                       key[byte_index + 0] =
+                               ((four_entries >> 0) & 0x000000FF);
+                       key[byte_index + 1] =
+                               ((four_entries >> 8) & 0x000000FF);
+                       key[byte_index + 2] =
+                               ((four_entries >> 16) & 0x000000FF);
+                       key[byte_index + 3] =
+                               ((four_entries >> 24) & 0x000000FF);
+               }
+       }
+       if (hfunc)
+               (*hfunc) = ETH_RSS_HASH_TOP;
+       return 0;
+}
+
+static int lan743x_ethtool_set_rxfh(struct net_device *netdev,
+                                   const u32 *indir, const u8 *key,
+                                   const u8 hfunc)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+               return -EOPNOTSUPP;
+
+       if (indir) {
+               u32 indir_value = 0;
+               int dword_index = 0;
+               int byte_index = 0;
+
+               for (dword_index = 0; dword_index < 32; dword_index++) {
+                       byte_index = dword_index << 2;
+                       indir_value =
+                               (((indir[byte_index + 0] & 0x000000FF) << 0) |
+                               ((indir[byte_index + 1] & 0x000000FF) << 8) |
+                               ((indir[byte_index + 2] & 0x000000FF) << 16) |
+                               ((indir[byte_index + 3] & 0x000000FF) << 24));
+                       lan743x_csr_write(adapter, RFE_INDX(dword_index),
+                                         indir_value);
+               }
+       }
+       if (key) {
+               int dword_index = 0;
+               int byte_index = 0;
+               u32 key_value = 0;
+
+               for (dword_index = 0; dword_index < 10; dword_index++) {
+                       byte_index = dword_index << 2;
+                       key_value =
+                               ((((u32)(key[byte_index + 0])) << 0) |
+                               (((u32)(key[byte_index + 1])) << 8) |
+                               (((u32)(key[byte_index + 2])) << 16) |
+                               (((u32)(key[byte_index + 3])) << 24));
+                       lan743x_csr_write(adapter, RFE_HASH_KEY(dword_index),
+                                         key_value);
+               }
+       }
+       return 0;
+}
+
+static int lan743x_ethtool_get_eee(struct net_device *netdev,
+                                  struct ethtool_eee *eee)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       struct phy_device *phydev = netdev->phydev;
+       u32 buf;
+       int ret;
+
+       if (!phydev)
+               return -EIO;
+       if (!phydev->drv) {
+               netif_err(adapter, drv, adapter->netdev,
+                         "Missing PHY Driver\n");
+               return -EIO;
+       }
+
+       ret = phy_ethtool_get_eee(phydev, eee);
+       if (ret < 0)
+               return ret;
+
+       buf = lan743x_csr_read(adapter, MAC_CR);
+       if (buf & MAC_CR_EEE_EN_) {
+               eee->eee_enabled = true;
+               eee->eee_active = !!(eee->advertised & eee->lp_advertised);
+               eee->tx_lpi_enabled = true;
+               /* EEE_TX_LPI_REQ_DLY & tx_lpi_timer are same uSec unit */
+               buf = lan743x_csr_read(adapter, MAC_EEE_TX_LPI_REQ_DLY_CNT);
+               eee->tx_lpi_timer = buf;
+       } else {
+               eee->eee_enabled = false;
+               eee->eee_active = false;
+               eee->tx_lpi_enabled = false;
+               eee->tx_lpi_timer = 0;
+       }
+
+       return 0;
+}
+
+static int lan743x_ethtool_set_eee(struct net_device *netdev,
+                                  struct ethtool_eee *eee)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       struct phy_device *phydev = NULL;
+       u32 buf = 0;
+       int ret = 0;
+
+       if (!netdev)
+               return -EINVAL;
+       adapter = netdev_priv(netdev);
+       if (!adapter)
+               return -EINVAL;
+       phydev = netdev->phydev;
+       if (!phydev)
+               return -EIO;
+       if (!phydev->drv) {
+               netif_err(adapter, drv, adapter->netdev,
+                         "Missing PHY Driver\n");
+               return -EIO;
+       }
+
+       if (eee->eee_enabled) {
+               ret = phy_init_eee(phydev, 0);
+               if (ret) {
+                       netif_err(adapter, drv, adapter->netdev,
+                                 "EEE initialization failed\n");
+                       return ret;
+               }
+
+               buf = (u32)eee->tx_lpi_timer;
+               lan743x_csr_write(adapter, MAC_EEE_TX_LPI_REQ_DLY_CNT, buf);
+
+               buf = lan743x_csr_read(adapter, MAC_CR);
+               buf |= MAC_CR_EEE_EN_;
+               lan743x_csr_write(adapter, MAC_CR, buf);
+       } else {
+               buf = lan743x_csr_read(adapter, MAC_CR);
+               buf &= ~MAC_CR_EEE_EN_;
+               lan743x_csr_write(adapter, MAC_CR, buf);
+       }
+
+       return phy_ethtool_set_eee(phydev, eee);
+}
+
+#ifdef CONFIG_PM
+static void lan743x_ethtool_get_wol(struct net_device *netdev,
+                                   struct ethtool_wolinfo *wol)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       wol->supported = 0;
+       wol->wolopts = 0;
+       phy_ethtool_get_wol(netdev->phydev, wol);
+
+       wol->supported |= WAKE_BCAST | WAKE_UCAST | WAKE_MCAST |
+               WAKE_MAGIC | WAKE_PHY | WAKE_ARP;
+
+       wol->wolopts |= adapter->wolopts;
+}
+
+static int lan743x_ethtool_set_wol(struct net_device *netdev,
+                                  struct ethtool_wolinfo *wol)
+{
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+       adapter->wolopts = 0;
+       if (wol->wolopts & WAKE_UCAST)
+               adapter->wolopts |= WAKE_UCAST;
+       if (wol->wolopts & WAKE_MCAST)
+               adapter->wolopts |= WAKE_MCAST;
+       if (wol->wolopts & WAKE_BCAST)
+               adapter->wolopts |= WAKE_BCAST;
+       if (wol->wolopts & WAKE_MAGIC)
+               adapter->wolopts |= WAKE_MAGIC;
+       if (wol->wolopts & WAKE_PHY)
+               adapter->wolopts |= WAKE_PHY;
+       if (wol->wolopts & WAKE_ARP)
+               adapter->wolopts |= WAKE_ARP;
+
+       device_set_wakeup_enable(&adapter->pdev->dev, (bool)wol->wolopts);
+
+       phy_ethtool_set_wol(netdev->phydev, wol);
+
+       return 0;
+}
+#endif /* CONFIG_PM */
+
+const struct ethtool_ops lan743x_ethtool_ops = {
+       .get_drvinfo = lan743x_ethtool_get_drvinfo,
+       .get_msglevel = lan743x_ethtool_get_msglevel,
+       .set_msglevel = lan743x_ethtool_set_msglevel,
+       .get_link = ethtool_op_get_link,
+
+       .get_eeprom_len = lan743x_ethtool_get_eeprom_len,
+       .get_eeprom = lan743x_ethtool_get_eeprom,
+       .set_eeprom = lan743x_ethtool_set_eeprom,
+       .get_strings = lan743x_ethtool_get_strings,
+       .get_ethtool_stats = lan743x_ethtool_get_ethtool_stats,
+       .get_sset_count = lan743x_ethtool_get_sset_count,
+       .get_rxnfc = lan743x_ethtool_get_rxnfc,
+       .get_rxfh_key_size = lan743x_ethtool_get_rxfh_key_size,
+       .get_rxfh_indir_size = lan743x_ethtool_get_rxfh_indir_size,
+       .get_rxfh = lan743x_ethtool_get_rxfh,
+       .set_rxfh = lan743x_ethtool_set_rxfh,
+       .get_eee = lan743x_ethtool_get_eee,
+       .set_eee = lan743x_ethtool_set_eee,
+       .get_link_ksettings = phy_ethtool_get_link_ksettings,
+       .set_link_ksettings = phy_ethtool_set_link_ksettings,
+#ifdef CONFIG_PM
+       .get_wol = lan743x_ethtool_get_wol,
+       .set_wol = lan743x_ethtool_set_wol,
+#endif
+};
diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.h b/drivers/net/ethernet/microchip/lan743x_ethtool.h
new file mode 100644 (file)
index 0000000..d0d11a7
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2018 Microchip Technology Inc. */
+
+#ifndef _LAN743X_ETHTOOL_H
+#define _LAN743X_ETHTOOL_H
+
+#include "linux/ethtool.h"
+
+extern const struct ethtool_ops lan743x_ethtool_ops;
+
+#endif /* _LAN743X_ETHTOOL_H */
index dd947e4dd3ce8c2f4cdca56d9f75d33aecd9f41c..bb323f2698392d2af220d4a6b29b9e37d1a38939 100644 (file)
@@ -11,7 +11,9 @@
 #include <linux/phy.h>
 #include <linux/rtnetlink.h>
 #include <linux/iopoll.h>
+#include <linux/crc16.h>
 #include "lan743x_main.h"
+#include "lan743x_ethtool.h"
 
 static void lan743x_pci_cleanup(struct lan743x_adapter *adapter)
 {
@@ -53,13 +55,13 @@ return_error:
        return ret;
 }
 
-static u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset)
+u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset)
 {
        return ioread32(&adapter->csr.csr_address[offset]);
 }
 
-static void lan743x_csr_write(struct lan743x_adapter *adapter, int offset,
-                             u32 data)
+void lan743x_csr_write(struct lan743x_adapter *adapter, int offset,
+                      u32 data)
 {
        iowrite32(data, &adapter->csr.csr_address[offset]);
 }
@@ -828,7 +830,7 @@ static int lan743x_mac_init(struct lan743x_adapter *adapter)
        }
 
        if (!mac_address_valid)
-               random_ether_addr(adapter->mac_address);
+               eth_random_addr(adapter->mac_address);
        lan743x_mac_set_address(adapter, adapter->mac_address);
        ether_addr_copy(netdev->dev_addr, adapter->mac_address);
        return 0;
@@ -1023,6 +1025,24 @@ return_error:
        return ret;
 }
 
+static void lan743x_rfe_open(struct lan743x_adapter *adapter)
+{
+       lan743x_csr_write(adapter, RFE_RSS_CFG,
+               RFE_RSS_CFG_UDP_IPV6_EX_ |
+               RFE_RSS_CFG_TCP_IPV6_EX_ |
+               RFE_RSS_CFG_IPV6_EX_ |
+               RFE_RSS_CFG_UDP_IPV6_ |
+               RFE_RSS_CFG_TCP_IPV6_ |
+               RFE_RSS_CFG_IPV6_ |
+               RFE_RSS_CFG_UDP_IPV4_ |
+               RFE_RSS_CFG_TCP_IPV4_ |
+               RFE_RSS_CFG_IPV4_ |
+               RFE_RSS_CFG_VALID_HASH_BITS_ |
+               RFE_RSS_CFG_RSS_QUEUE_ENABLE_ |
+               RFE_RSS_CFG_RSS_HASH_STORE_ |
+               RFE_RSS_CFG_RSS_ENABLE_);
+}
+
 static void lan743x_rfe_update_mac_address(struct lan743x_adapter *adapter)
 {
        u8 *mac_addr;
@@ -2417,6 +2437,8 @@ static int lan743x_netdev_open(struct net_device *netdev)
        if (ret)
                goto close_mac;
 
+       lan743x_rfe_open(adapter);
+
        for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
                ret = lan743x_rx_open(&adapter->rx[index]);
                if (ret)
@@ -2689,6 +2711,7 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev,
                goto cleanup_hardware;
 
        adapter->netdev->netdev_ops = &lan743x_netdev_ops;
+       adapter->netdev->ethtool_ops = &lan743x_ethtool_ops;
        adapter->netdev->features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_CSUM;
        adapter->netdev->hw_features = adapter->netdev->features;
 
@@ -2747,10 +2770,182 @@ static void lan743x_pcidev_shutdown(struct pci_dev *pdev)
                lan743x_netdev_close(netdev);
        rtnl_unlock();
 
+#ifdef CONFIG_PM
+       pci_save_state(pdev);
+#endif
+
        /* clean up lan743x portion */
        lan743x_hardware_cleanup(adapter);
 }
 
+#ifdef CONFIG_PM
+static u16 lan743x_pm_wakeframe_crc16(const u8 *buf, int len)
+{
+       return bitrev16(crc16(0xFFFF, buf, len));
+}
+
+static void lan743x_pm_set_wol(struct lan743x_adapter *adapter)
+{
+       const u8 ipv4_multicast[3] = { 0x01, 0x00, 0x5E };
+       const u8 ipv6_multicast[3] = { 0x33, 0x33 };
+       const u8 arp_type[2] = { 0x08, 0x06 };
+       int mask_index;
+       u32 pmtctl;
+       u32 wucsr;
+       u32 macrx;
+       u16 crc;
+
+       for (mask_index = 0; mask_index < MAC_NUM_OF_WUF_CFG; mask_index++)
+               lan743x_csr_write(adapter, MAC_WUF_CFG(mask_index), 0);
+
+       /* clear wake settings */
+       pmtctl = lan743x_csr_read(adapter, PMT_CTL);
+       pmtctl |= PMT_CTL_WUPS_MASK_;
+       pmtctl &= ~(PMT_CTL_GPIO_WAKEUP_EN_ | PMT_CTL_EEE_WAKEUP_EN_ |
+               PMT_CTL_WOL_EN_ | PMT_CTL_MAC_D3_RX_CLK_OVR_ |
+               PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_ | PMT_CTL_ETH_PHY_WAKE_EN_);
+
+       macrx = lan743x_csr_read(adapter, MAC_RX);
+
+       wucsr = 0;
+       mask_index = 0;
+
+       pmtctl |= PMT_CTL_ETH_PHY_D3_COLD_OVR_ | PMT_CTL_ETH_PHY_D3_OVR_;
+
+       if (adapter->wolopts & WAKE_PHY) {
+               pmtctl |= PMT_CTL_ETH_PHY_EDPD_PLL_CTL_;
+               pmtctl |= PMT_CTL_ETH_PHY_WAKE_EN_;
+       }
+       if (adapter->wolopts & WAKE_MAGIC) {
+               wucsr |= MAC_WUCSR_MPEN_;
+               macrx |= MAC_RX_RXEN_;
+               pmtctl |= PMT_CTL_WOL_EN_ | PMT_CTL_MAC_D3_RX_CLK_OVR_;
+       }
+       if (adapter->wolopts & WAKE_UCAST) {
+               wucsr |= MAC_WUCSR_RFE_WAKE_EN_ | MAC_WUCSR_PFDA_EN_;
+               macrx |= MAC_RX_RXEN_;
+               pmtctl |= PMT_CTL_WOL_EN_ | PMT_CTL_MAC_D3_RX_CLK_OVR_;
+               pmtctl |= PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_;
+       }
+       if (adapter->wolopts & WAKE_BCAST) {
+               wucsr |= MAC_WUCSR_RFE_WAKE_EN_ | MAC_WUCSR_BCST_EN_;
+               macrx |= MAC_RX_RXEN_;
+               pmtctl |= PMT_CTL_WOL_EN_ | PMT_CTL_MAC_D3_RX_CLK_OVR_;
+               pmtctl |= PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_;
+       }
+       if (adapter->wolopts & WAKE_MCAST) {
+               /* IPv4 multicast */
+               crc = lan743x_pm_wakeframe_crc16(ipv4_multicast, 3);
+               lan743x_csr_write(adapter, MAC_WUF_CFG(mask_index),
+                                 MAC_WUF_CFG_EN_ | MAC_WUF_CFG_TYPE_MCAST_ |
+                                 (0 << MAC_WUF_CFG_OFFSET_SHIFT_) |
+                                 (crc & MAC_WUF_CFG_CRC16_MASK_));
+               lan743x_csr_write(adapter, MAC_WUF_MASK0(mask_index), 7);
+               lan743x_csr_write(adapter, MAC_WUF_MASK1(mask_index), 0);
+               lan743x_csr_write(adapter, MAC_WUF_MASK2(mask_index), 0);
+               lan743x_csr_write(adapter, MAC_WUF_MASK3(mask_index), 0);
+               mask_index++;
+
+               /* IPv6 multicast */
+               crc = lan743x_pm_wakeframe_crc16(ipv6_multicast, 2);
+               lan743x_csr_write(adapter, MAC_WUF_CFG(mask_index),
+                                 MAC_WUF_CFG_EN_ | MAC_WUF_CFG_TYPE_MCAST_ |
+                                 (0 << MAC_WUF_CFG_OFFSET_SHIFT_) |
+                                 (crc & MAC_WUF_CFG_CRC16_MASK_));
+               lan743x_csr_write(adapter, MAC_WUF_MASK0(mask_index), 3);
+               lan743x_csr_write(adapter, MAC_WUF_MASK1(mask_index), 0);
+               lan743x_csr_write(adapter, MAC_WUF_MASK2(mask_index), 0);
+               lan743x_csr_write(adapter, MAC_WUF_MASK3(mask_index), 0);
+               mask_index++;
+
+               wucsr |= MAC_WUCSR_RFE_WAKE_EN_ | MAC_WUCSR_WAKE_EN_;
+               macrx |= MAC_RX_RXEN_;
+               pmtctl |= PMT_CTL_WOL_EN_ | PMT_CTL_MAC_D3_RX_CLK_OVR_;
+               pmtctl |= PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_;
+       }
+       if (adapter->wolopts & WAKE_ARP) {
+               /* set MAC_WUF_CFG & WUF_MASK
+                * for packettype (offset 12,13) = ARP (0x0806)
+                */
+               crc = lan743x_pm_wakeframe_crc16(arp_type, 2);
+               lan743x_csr_write(adapter, MAC_WUF_CFG(mask_index),
+                                 MAC_WUF_CFG_EN_ | MAC_WUF_CFG_TYPE_ALL_ |
+                                 (0 << MAC_WUF_CFG_OFFSET_SHIFT_) |
+                                 (crc & MAC_WUF_CFG_CRC16_MASK_));
+               lan743x_csr_write(adapter, MAC_WUF_MASK0(mask_index), 0x3000);
+               lan743x_csr_write(adapter, MAC_WUF_MASK1(mask_index), 0);
+               lan743x_csr_write(adapter, MAC_WUF_MASK2(mask_index), 0);
+               lan743x_csr_write(adapter, MAC_WUF_MASK3(mask_index), 0);
+               mask_index++;
+
+               wucsr |= MAC_WUCSR_RFE_WAKE_EN_ | MAC_WUCSR_WAKE_EN_;
+               macrx |= MAC_RX_RXEN_;
+               pmtctl |= PMT_CTL_WOL_EN_ | PMT_CTL_MAC_D3_RX_CLK_OVR_;
+               pmtctl |= PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_;
+       }
+
+       lan743x_csr_write(adapter, MAC_WUCSR, wucsr);
+       lan743x_csr_write(adapter, PMT_CTL, pmtctl);
+       lan743x_csr_write(adapter, MAC_RX, macrx);
+}
+
+static int lan743x_pm_suspend(struct device *dev)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       int ret;
+
+       lan743x_pcidev_shutdown(pdev);
+
+       /* clear all wakes */
+       lan743x_csr_write(adapter, MAC_WUCSR, 0);
+       lan743x_csr_write(adapter, MAC_WUCSR2, 0);
+       lan743x_csr_write(adapter, MAC_WK_SRC, 0xFFFFFFFF);
+
+       if (adapter->wolopts)
+               lan743x_pm_set_wol(adapter);
+
+       /* Host sets PME_En, put D3hot */
+       ret = pci_prepare_to_sleep(pdev);
+
+       return 0;
+}
+
+static int lan743x_pm_resume(struct device *dev)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct lan743x_adapter *adapter = netdev_priv(netdev);
+       int ret;
+
+       pci_set_power_state(pdev, PCI_D0);
+       pci_restore_state(pdev);
+       pci_save_state(pdev);
+
+       ret = lan743x_hardware_init(adapter, pdev);
+       if (ret) {
+               netif_err(adapter, probe, adapter->netdev,
+                         "lan743x_hardware_init returned %d\n", ret);
+       }
+
+       /* open netdev when netdev is at running state while resume.
+        * For instance, it is true when system wakesup after pm-suspend
+        * However, it is false when system wakes up after suspend GUI menu
+        */
+       if (netif_running(netdev))
+               lan743x_netdev_open(netdev);
+
+       netif_device_attach(netdev);
+
+       return 0;
+}
+
+static const struct dev_pm_ops lan743x_pm_ops = {
+       SET_SYSTEM_SLEEP_PM_OPS(lan743x_pm_suspend, lan743x_pm_resume)
+};
+#endif /*CONFIG_PM */
+
 static const struct pci_device_id lan743x_pcidev_tbl[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) },
        { 0, }
@@ -2761,6 +2956,9 @@ static struct pci_driver lan743x_pcidev_driver = {
        .id_table = lan743x_pcidev_tbl,
        .probe    = lan743x_pcidev_probe,
        .remove   = lan743x_pcidev_remove,
+#ifdef CONFIG_PM
+       .driver.pm = &lan743x_pm_ops,
+#endif
        .shutdown = lan743x_pcidev_shutdown,
 };
 
index 73b463a9df61f31197b777f3b1db6c53e0f7509d..4fa7a5e027f44be6e60a60d46b198e7fcd9d61fe 100644 (file)
 #define HW_CFG_LRST_                           BIT(1)
 
 #define PMT_CTL                                        (0x014)
+#define PMT_CTL_ETH_PHY_D3_COLD_OVR_           BIT(27)
+#define PMT_CTL_MAC_D3_RX_CLK_OVR_             BIT(25)
+#define PMT_CTL_ETH_PHY_EDPD_PLL_CTL_          BIT(24)
+#define PMT_CTL_ETH_PHY_D3_OVR_                        BIT(23)
+#define PMT_CTL_RX_FCT_RFE_D3_CLK_OVR_         BIT(18)
+#define PMT_CTL_GPIO_WAKEUP_EN_                        BIT(15)
+#define PMT_CTL_EEE_WAKEUP_EN_                 BIT(13)
 #define PMT_CTL_READY_                         BIT(7)
 #define PMT_CTL_ETH_PHY_RST_                   BIT(4)
+#define PMT_CTL_WOL_EN_                                BIT(3)
+#define PMT_CTL_ETH_PHY_WAKE_EN_               BIT(2)
+#define PMT_CTL_WUPS_MASK_                     (0x00000003)
 
 #define DP_SEL                         (0x024)
 #define DP_SEL_DPRDY_                  BIT(31)
 
 #define DP_DATA_0                      (0x030)
 
+#define E2P_CMD                                (0x040)
+#define E2P_CMD_EPC_BUSY_              BIT(31)
+#define E2P_CMD_EPC_CMD_WRITE_         (0x30000000)
+#define E2P_CMD_EPC_CMD_EWEN_          (0x20000000)
+#define E2P_CMD_EPC_CMD_READ_          (0x00000000)
+#define E2P_CMD_EPC_TIMEOUT_           BIT(10)
+#define E2P_CMD_EPC_ADDR_MASK_         (0x000001FF)
+
+#define E2P_DATA                       (0x044)
+
 #define FCT_RX_CTL                     (0xAC)
 #define FCT_RX_CTL_EN_(channel)                BIT(28 + (channel))
 #define FCT_RX_CTL_DIS_(channel)       BIT(24 + (channel))
@@ -62,6 +82,7 @@
        ((value << 0) & FCT_FLOW_CTL_ON_THRESHOLD_)
 
 #define MAC_CR                         (0x100)
+#define MAC_CR_EEE_EN_                 BIT(17)
 #define MAC_CR_ADD_                    BIT(12)
 #define MAC_CR_ASD_                    BIT(11)
 #define MAC_CR_CNTR_RST_               BIT(5)
 
 #define MAC_MII_DATA                   (0x124)
 
+#define MAC_EEE_TX_LPI_REQ_DLY_CNT             (0x130)
+
+#define MAC_WUCSR                              (0x140)
+#define MAC_WUCSR_RFE_WAKE_EN_                 BIT(14)
+#define MAC_WUCSR_PFDA_EN_                     BIT(3)
+#define MAC_WUCSR_WAKE_EN_                     BIT(2)
+#define MAC_WUCSR_MPEN_                                BIT(1)
+#define MAC_WUCSR_BCST_EN_                     BIT(0)
+
+#define MAC_WK_SRC                             (0x144)
+
+#define MAC_WUF_CFG0                   (0x150)
+#define MAC_NUM_OF_WUF_CFG             (32)
+#define MAC_WUF_CFG_BEGIN              (MAC_WUF_CFG0)
+#define MAC_WUF_CFG(index)             (MAC_WUF_CFG_BEGIN + (4 * (index)))
+#define MAC_WUF_CFG_EN_                        BIT(31)
+#define MAC_WUF_CFG_TYPE_MCAST_                (0x02000000)
+#define MAC_WUF_CFG_TYPE_ALL_          (0x01000000)
+#define MAC_WUF_CFG_OFFSET_SHIFT_      (16)
+#define MAC_WUF_CFG_CRC16_MASK_                (0x0000FFFF)
+
+#define MAC_WUF_MASK0_0                        (0x200)
+#define MAC_WUF_MASK0_1                        (0x204)
+#define MAC_WUF_MASK0_2                        (0x208)
+#define MAC_WUF_MASK0_3                        (0x20C)
+#define MAC_WUF_MASK0_BEGIN            (MAC_WUF_MASK0_0)
+#define MAC_WUF_MASK1_BEGIN            (MAC_WUF_MASK0_1)
+#define MAC_WUF_MASK2_BEGIN            (MAC_WUF_MASK0_2)
+#define MAC_WUF_MASK3_BEGIN            (MAC_WUF_MASK0_3)
+#define MAC_WUF_MASK0(index)           (MAC_WUF_MASK0_BEGIN + (0x10 * (index)))
+#define MAC_WUF_MASK1(index)           (MAC_WUF_MASK1_BEGIN + (0x10 * (index)))
+#define MAC_WUF_MASK2(index)           (MAC_WUF_MASK2_BEGIN + (0x10 * (index)))
+#define MAC_WUF_MASK3(index)           (MAC_WUF_MASK3_BEGIN + (0x10 * (index)))
+
 /* offset 0x400 - 0x500, x may range from 0 to 32, for a total of 33 entries */
 #define RFE_ADDR_FILT_HI(x)            (0x400 + (8 * (x)))
 #define RFE_ADDR_FILT_HI_VALID_                BIT(31)
 #define RFE_CTL_MCAST_HASH_            BIT(3)
 #define RFE_CTL_DA_PERFECT_            BIT(1)
 
+#define RFE_RSS_CFG                    (0x554)
+#define RFE_RSS_CFG_UDP_IPV6_EX_       BIT(16)
+#define RFE_RSS_CFG_TCP_IPV6_EX_       BIT(15)
+#define RFE_RSS_CFG_IPV6_EX_           BIT(14)
+#define RFE_RSS_CFG_UDP_IPV6_          BIT(13)
+#define RFE_RSS_CFG_TCP_IPV6_          BIT(12)
+#define RFE_RSS_CFG_IPV6_              BIT(11)
+#define RFE_RSS_CFG_UDP_IPV4_          BIT(10)
+#define RFE_RSS_CFG_TCP_IPV4_          BIT(9)
+#define RFE_RSS_CFG_IPV4_              BIT(8)
+#define RFE_RSS_CFG_VALID_HASH_BITS_   (0x000000E0)
+#define RFE_RSS_CFG_RSS_QUEUE_ENABLE_  BIT(2)
+#define RFE_RSS_CFG_RSS_HASH_STORE_    BIT(1)
+#define RFE_RSS_CFG_RSS_ENABLE_                BIT(0)
+
+#define RFE_HASH_KEY(index)            (0x558 + (index << 2))
+
+#define RFE_INDX(index)                        (0x580 + (index << 2))
+
+#define MAC_WUCSR2                     (0x600)
+
 #define INT_STS                                (0x780)
 #define INT_BIT_DMA_RX_(channel)       BIT(24 + (channel))
 #define INT_BIT_ALL_RX_                        (0x0F000000)
 #define TX_CFG_C_TX_DMA_INT_STS_AUTO_CLR_      BIT(3)
 #define TX_CFG_C_TX_INT_STS_R2C_MODE_MASK_     (0x00000007)
 
+#define OTP_PWR_DN                             (0x1000)
+#define OTP_PWR_DN_PWRDN_N_                    BIT(0)
+
+#define OTP_ADDR1                              (0x1004)
+#define OTP_ADDR1_15_11_MASK_                  (0x1F)
+
+#define OTP_ADDR2                              (0x1008)
+#define OTP_ADDR2_10_3_MASK_                   (0xFF)
+
+#define OTP_PRGM_DATA                          (0x1010)
+
+#define OTP_PRGM_MODE                          (0x1014)
+#define OTP_PRGM_MODE_BYTE_                    BIT(0)
+
+#define OTP_TST_CMD                            (0x1024)
+#define OTP_TST_CMD_PRGVRFY_                   BIT(3)
+
+#define OTP_CMD_GO                             (0x1028)
+#define OTP_CMD_GO_GO_                         BIT(0)
+
+#define OTP_STATUS                             (0x1030)
+#define OTP_STATUS_BUSY_                       BIT(0)
+
 /* MAC statistics registers */
 #define STAT_RX_FCS_ERRORS                     (0x1200)
 #define STAT_RX_ALIGNMENT_ERRORS               (0x1204)
+#define STAT_RX_FRAGMENT_ERRORS                        (0x1208)
 #define STAT_RX_JABBER_ERRORS                  (0x120C)
 #define STAT_RX_UNDERSIZE_FRAME_ERRORS         (0x1210)
 #define STAT_RX_OVERSIZE_FRAME_ERRORS          (0x1214)
 #define STAT_RX_UNICAST_BYTE_COUNT             (0x121C)
 #define STAT_RX_BROADCAST_BYTE_COUNT           (0x1220)
 #define STAT_RX_MULTICAST_BYTE_COUNT           (0x1224)
+#define STAT_RX_UNICAST_FRAMES                 (0x1228)
+#define STAT_RX_BROADCAST_FRAMES               (0x122C)
 #define STAT_RX_MULTICAST_FRAMES               (0x1230)
+#define STAT_RX_PAUSE_FRAMES                   (0x1234)
+#define STAT_RX_64_BYTE_FRAMES                 (0x1238)
+#define STAT_RX_65_127_BYTE_FRAMES             (0x123C)
+#define STAT_RX_128_255_BYTE_FRAMES            (0x1240)
+#define STAT_RX_256_511_BYTES_FRAMES           (0x1244)
+#define STAT_RX_512_1023_BYTE_FRAMES           (0x1248)
+#define STAT_RX_1024_1518_BYTE_FRAMES          (0x124C)
+#define STAT_RX_GREATER_1518_BYTE_FRAMES       (0x1250)
 #define STAT_RX_TOTAL_FRAMES                   (0x1254)
+#define STAT_EEE_RX_LPI_TRANSITIONS            (0x1258)
+#define STAT_EEE_RX_LPI_TIME                   (0x125C)
+#define STAT_RX_COUNTER_ROLLOVER_STATUS                (0x127C)
 
 #define STAT_TX_FCS_ERRORS                     (0x1280)
 #define STAT_TX_EXCESS_DEFERRAL_ERRORS         (0x1284)
 #define STAT_TX_CARRIER_ERRORS                 (0x1288)
+#define STAT_TX_BAD_BYTE_COUNT                 (0x128C)
 #define STAT_TX_SINGLE_COLLISIONS              (0x1290)
 #define STAT_TX_MULTIPLE_COLLISIONS            (0x1294)
 #define STAT_TX_EXCESSIVE_COLLISION            (0x1298)
 #define STAT_TX_UNICAST_BYTE_COUNT             (0x12A0)
 #define STAT_TX_BROADCAST_BYTE_COUNT           (0x12A4)
 #define STAT_TX_MULTICAST_BYTE_COUNT           (0x12A8)
+#define STAT_TX_UNICAST_FRAMES                 (0x12AC)
+#define STAT_TX_BROADCAST_FRAMES               (0x12B0)
 #define STAT_TX_MULTICAST_FRAMES               (0x12B4)
+#define STAT_TX_PAUSE_FRAMES                   (0x12B8)
+#define STAT_TX_64_BYTE_FRAMES                 (0x12BC)
+#define STAT_TX_65_127_BYTE_FRAMES             (0x12C0)
+#define STAT_TX_128_255_BYTE_FRAMES            (0x12C4)
+#define STAT_TX_256_511_BYTES_FRAMES           (0x12C8)
+#define STAT_TX_512_1023_BYTE_FRAMES           (0x12CC)
+#define STAT_TX_1024_1518_BYTE_FRAMES          (0x12D0)
+#define STAT_TX_GREATER_1518_BYTE_FRAMES       (0x12D4)
 #define STAT_TX_TOTAL_FRAMES                   (0x12D8)
+#define STAT_EEE_TX_LPI_TRANSITIONS            (0x12DC)
+#define STAT_EEE_TX_LPI_TIME                   (0x12E0)
+#define STAT_TX_COUNTER_ROLLOVER_STATUS                (0x12FC)
 
 /* End of Register definitions */
 
@@ -473,6 +600,9 @@ struct lan743x_adapter {
        struct net_device       *netdev;
        struct mii_bus          *mdiobus;
        int                     msg_enable;
+#ifdef CONFIG_PM
+       u32                     wolopts;
+#endif
        struct pci_dev          *pdev;
        struct lan743x_csr      csr;
        struct lan743x_intr     intr;
@@ -594,4 +724,7 @@ struct lan743x_rx_buffer_info {
 #define RX_PROCESS_RESULT_PACKET_RECEIVED   (1)
 #define RX_PROCESS_RESULT_PACKET_DROPPED    (2)
 
+u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset);
+void lan743x_csr_write(struct lan743x_adapter *adapter, int offset, u32 data);
+
 #endif /* _LAN743X_H */
index 776a8a9be8e3551311f5a99ba0285c4c698cf10a..1a4f2bb48ead712634ce5968e23144117d89b8d7 100644 (file)
@@ -148,12 +148,191 @@ static inline int ocelot_vlant_wait_for_completion(struct ocelot *ocelot)
        return 0;
 }
 
+static int ocelot_vlant_set_mask(struct ocelot *ocelot, u16 vid, u32 mask)
+{
+       /* Select the VID to configure */
+       ocelot_write(ocelot, ANA_TABLES_VLANTIDX_V_INDEX(vid),
+                    ANA_TABLES_VLANTIDX);
+       /* Set the vlan port members mask and issue a write command */
+       ocelot_write(ocelot, ANA_TABLES_VLANACCESS_VLAN_PORT_MASK(mask) |
+                            ANA_TABLES_VLANACCESS_CMD_WRITE,
+                    ANA_TABLES_VLANACCESS);
+
+       return ocelot_vlant_wait_for_completion(ocelot);
+}
+
+static void ocelot_vlan_mode(struct ocelot_port *port,
+                            netdev_features_t features)
+{
+       struct ocelot *ocelot = port->ocelot;
+       u8 p = port->chip_port;
+       u32 val;
+
+       /* Filtering */
+       val = ocelot_read(ocelot, ANA_VLANMASK);
+       if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
+               val |= BIT(p);
+       else
+               val &= ~BIT(p);
+       ocelot_write(ocelot, val, ANA_VLANMASK);
+}
+
+static void ocelot_vlan_port_apply(struct ocelot *ocelot,
+                                  struct ocelot_port *port)
+{
+       u32 val;
+
+       /* Ingress clasification (ANA_PORT_VLAN_CFG) */
+       /* Default vlan to clasify for untagged frames (may be zero) */
+       val = ANA_PORT_VLAN_CFG_VLAN_VID(port->pvid);
+       if (port->vlan_aware)
+               val |= ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA |
+                      ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1);
+
+       ocelot_rmw_gix(ocelot, val,
+                      ANA_PORT_VLAN_CFG_VLAN_VID_M |
+                      ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA |
+                      ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M,
+                      ANA_PORT_VLAN_CFG, port->chip_port);
+
+       /* Drop frames with multicast source address */
+       val = ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA;
+       if (port->vlan_aware && !port->vid)
+               /* If port is vlan-aware and tagged, drop untagged and priority
+                * tagged frames.
+                */
+               val |= ANA_PORT_DROP_CFG_DROP_UNTAGGED_ENA |
+                      ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA |
+                      ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA;
+       ocelot_write_gix(ocelot, val, ANA_PORT_DROP_CFG, port->chip_port);
+
+       /* Egress configuration (REW_TAG_CFG): VLAN tag type to 8021Q. */
+       val = REW_TAG_CFG_TAG_TPID_CFG(0);
+
+       if (port->vlan_aware) {
+               if (port->vid)
+                       /* Tag all frames except when VID == DEFAULT_VLAN */
+                       val |= REW_TAG_CFG_TAG_CFG(1);
+               else
+                       /* Tag all frames */
+                       val |= REW_TAG_CFG_TAG_CFG(3);
+       }
+       ocelot_rmw_gix(ocelot, val,
+                      REW_TAG_CFG_TAG_TPID_CFG_M |
+                      REW_TAG_CFG_TAG_CFG_M,
+                      REW_TAG_CFG, port->chip_port);
+
+       /* Set default VLAN and tag type to 8021Q. */
+       val = REW_PORT_VLAN_CFG_PORT_TPID(ETH_P_8021Q) |
+             REW_PORT_VLAN_CFG_PORT_VID(port->vid);
+       ocelot_rmw_gix(ocelot, val,
+                      REW_PORT_VLAN_CFG_PORT_TPID_M |
+                      REW_PORT_VLAN_CFG_PORT_VID_M,
+                      REW_PORT_VLAN_CFG, port->chip_port);
+}
+
+static int ocelot_vlan_vid_add(struct net_device *dev, u16 vid, bool pvid,
+                              bool untagged)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+       int ret;
+
+       /* Add the port MAC address to with the right VLAN information */
+       ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, vid,
+                         ENTRYTYPE_LOCKED);
+
+       /* Make the port a member of the VLAN */
+       ocelot->vlan_mask[vid] |= BIT(port->chip_port);
+       ret = ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
+       if (ret)
+               return ret;
+
+       /* Default ingress vlan classification */
+       if (pvid)
+               port->pvid = vid;
+
+       /* Untagged egress vlan clasification */
+       if (untagged)
+               port->vid = vid;
+
+       ocelot_vlan_port_apply(ocelot, port);
+
+       return 0;
+}
+
+static int ocelot_vlan_vid_del(struct net_device *dev, u16 vid)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       struct ocelot *ocelot = port->ocelot;
+       int ret;
+
+       /* 8021q removes VID 0 on module unload for all interfaces
+        * with VLAN filtering feature. We need to keep it to receive
+        * untagged traffic.
+        */
+       if (vid == 0)
+               return 0;
+
+       /* Del the port MAC address to with the right VLAN information */
+       ocelot_mact_forget(ocelot, dev->dev_addr, vid);
+
+       /* Stop the port from being a member of the vlan */
+       ocelot->vlan_mask[vid] &= ~BIT(port->chip_port);
+       ret = ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
+       if (ret)
+               return ret;
+
+       /* Ingress */
+       if (port->pvid == vid)
+               port->pvid = 0;
+
+       /* Egress */
+       if (port->vid == vid)
+               port->vid = 0;
+
+       ocelot_vlan_port_apply(ocelot, port);
+
+       return 0;
+}
+
 static void ocelot_vlan_init(struct ocelot *ocelot)
 {
+       u16 port, vid;
+
        /* Clear VLAN table, by default all ports are members of all VLANs */
        ocelot_write(ocelot, ANA_TABLES_VLANACCESS_CMD_INIT,
                     ANA_TABLES_VLANACCESS);
        ocelot_vlant_wait_for_completion(ocelot);
+
+       /* Configure the port VLAN memberships */
+       for (vid = 1; vid < VLAN_N_VID; vid++) {
+               ocelot->vlan_mask[vid] = 0;
+               ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
+       }
+
+       /* Because VLAN filtering is enabled, we need VID 0 to get untagged
+        * traffic.  It is added automatically if 8021q module is loaded, but
+        * we can't rely on it since module may be not loaded.
+        */
+       ocelot->vlan_mask[0] = GENMASK(ocelot->num_phys_ports - 1, 0);
+       ocelot_vlant_set_mask(ocelot, 0, ocelot->vlan_mask[0]);
+
+       /* Configure the CPU port to be VLAN aware */
+       ocelot_write_gix(ocelot, ANA_PORT_VLAN_CFG_VLAN_VID(0) |
+                                ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA |
+                                ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1),
+                        ANA_PORT_VLAN_CFG, ocelot->num_phys_ports);
+
+       /* Set vlan ingress filter mask to all ports but the CPU port by
+        * default.
+        */
+       ocelot_write(ocelot, GENMASK(9, 0), ANA_VLANMASK);
+
+       for (port = 0; port < ocelot->num_phys_ports; port++) {
+               ocelot_write_gix(ocelot, 0, REW_PORT_VLAN_CFG, port);
+               ocelot_write_gix(ocelot, 0, REW_TAG_CFG, port);
+       }
 }
 
 /* Watermark encode
@@ -539,6 +718,20 @@ static int ocelot_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
        struct ocelot_port *port = netdev_priv(dev);
        struct ocelot *ocelot = port->ocelot;
 
+       if (!vid) {
+               if (!port->vlan_aware)
+                       /* If the bridge is not VLAN aware and no VID was
+                        * provided, set it to pvid to ensure the MAC entry
+                        * matches incoming untagged packets
+                        */
+                       vid = port->pvid;
+               else
+                       /* If the bridge is VLAN aware a VID must be provided as
+                        * otherwise the learnt entry wouldn't match any frame.
+                        */
+                       return -EINVAL;
+       }
+
        return ocelot_mact_learn(ocelot, port->chip_port, addr, vid,
                                 ENTRYTYPE_NORMAL);
 }
@@ -690,6 +883,30 @@ end:
        return ret;
 }
 
+static int ocelot_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
+                                 u16 vid)
+{
+       return ocelot_vlan_vid_add(dev, vid, false, true);
+}
+
+static int ocelot_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
+                                  u16 vid)
+{
+       return ocelot_vlan_vid_del(dev, vid);
+}
+
+static int ocelot_set_features(struct net_device *dev,
+                              netdev_features_t features)
+{
+       struct ocelot_port *port = netdev_priv(dev);
+       netdev_features_t changed = dev->features ^ features;
+
+       if (changed & NETIF_F_HW_VLAN_CTAG_FILTER)
+               ocelot_vlan_mode(port, features);
+
+       return 0;
+}
+
 static const struct net_device_ops ocelot_port_netdev_ops = {
        .ndo_open                       = ocelot_port_open,
        .ndo_stop                       = ocelot_port_stop,
@@ -701,6 +918,9 @@ static const struct net_device_ops ocelot_port_netdev_ops = {
        .ndo_fdb_add                    = ocelot_fdb_add,
        .ndo_fdb_del                    = ocelot_fdb_del,
        .ndo_fdb_dump                   = ocelot_fdb_dump,
+       .ndo_vlan_rx_add_vid            = ocelot_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid           = ocelot_vlan_rx_kill_vid,
+       .ndo_set_features               = ocelot_set_features,
 };
 
 static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data)
@@ -780,6 +1000,8 @@ static const struct ethtool_ops ocelot_ethtool_ops = {
        .get_strings            = ocelot_get_strings,
        .get_ethtool_stats      = ocelot_get_ethtool_stats,
        .get_sset_count         = ocelot_get_sset_count,
+       .get_link_ksettings     = phy_ethtool_get_link_ksettings,
+       .set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
 static int ocelot_port_attr_get(struct net_device *dev,
@@ -914,6 +1136,10 @@ static int ocelot_port_attr_set(struct net_device *dev,
        case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
                ocelot_port_attr_ageing_set(ocelot_port, attr->u.ageing_time);
                break;
+       case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+               ocelot_port->vlan_aware = attr->u.vlan_filtering;
+               ocelot_vlan_port_apply(ocelot_port->ocelot, ocelot_port);
+               break;
        case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
                ocelot_port_attr_mc_set(ocelot_port, !attr->u.mc_disabled);
                break;
@@ -925,6 +1151,40 @@ static int ocelot_port_attr_set(struct net_device *dev,
        return err;
 }
 
+static int ocelot_port_obj_add_vlan(struct net_device *dev,
+                                   const struct switchdev_obj_port_vlan *vlan,
+                                   struct switchdev_trans *trans)
+{
+       int ret;
+       u16 vid;
+
+       for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
+               ret = ocelot_vlan_vid_add(dev, vid,
+                                         vlan->flags & BRIDGE_VLAN_INFO_PVID,
+                                         vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int ocelot_port_vlan_del_vlan(struct net_device *dev,
+                                    const struct switchdev_obj_port_vlan *vlan)
+{
+       int ret;
+       u16 vid;
+
+       for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
+               ret = ocelot_vlan_vid_del(dev, vid);
+
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static struct ocelot_multicast *ocelot_multicast_get(struct ocelot *ocelot,
                                                     const unsigned char *addr,
                                                     u16 vid)
@@ -951,7 +1211,7 @@ static int ocelot_port_obj_add_mdb(struct net_device *dev,
        bool new = false;
 
        if (!vid)
-               vid = 1;
+               vid = port->pvid;
 
        mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
        if (!mc) {
@@ -992,7 +1252,7 @@ static int ocelot_port_obj_del_mdb(struct net_device *dev,
        u16 vid = mdb->vid;
 
        if (!vid)
-               vid = 1;
+               vid = port->pvid;
 
        mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
        if (!mc)
@@ -1024,6 +1284,11 @@ static int ocelot_port_obj_add(struct net_device *dev,
        int ret = 0;
 
        switch (obj->id) {
+       case SWITCHDEV_OBJ_ID_PORT_VLAN:
+               ret = ocelot_port_obj_add_vlan(dev,
+                                              SWITCHDEV_OBJ_PORT_VLAN(obj),
+                                              trans);
+               break;
        case SWITCHDEV_OBJ_ID_PORT_MDB:
                ret = ocelot_port_obj_add_mdb(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
                                              trans);
@@ -1041,6 +1306,10 @@ static int ocelot_port_obj_del(struct net_device *dev,
        int ret = 0;
 
        switch (obj->id) {
+       case SWITCHDEV_OBJ_ID_PORT_VLAN:
+               ret = ocelot_port_vlan_del_vlan(dev,
+                                               SWITCHDEV_OBJ_PORT_VLAN(obj));
+               break;
        case SWITCHDEV_OBJ_ID_PORT_MDB:
                ret = ocelot_port_obj_del_mdb(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
                break;
@@ -1086,6 +1355,142 @@ static void ocelot_port_bridge_leave(struct ocelot_port *ocelot_port,
 
        if (!ocelot->bridge_mask)
                ocelot->hw_bridge_dev = NULL;
+
+       /* Clear bridge vlan settings before calling ocelot_vlan_port_apply */
+       ocelot_port->vlan_aware = 0;
+       ocelot_port->pvid = 0;
+       ocelot_port->vid = 0;
+}
+
+static void ocelot_set_aggr_pgids(struct ocelot *ocelot)
+{
+       int i, port, lag;
+
+       /* Reset destination and aggregation PGIDS */
+       for (port = 0; port < ocelot->num_phys_ports; port++)
+               ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, port);
+
+       for (i = PGID_AGGR; i < PGID_SRC; i++)
+               ocelot_write_rix(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0),
+                                ANA_PGID_PGID, i);
+
+       /* Now, set PGIDs for each LAG */
+       for (lag = 0; lag < ocelot->num_phys_ports; lag++) {
+               unsigned long bond_mask;
+               int aggr_count = 0;
+               u8 aggr_idx[16];
+
+               bond_mask = ocelot->lags[lag];
+               if (!bond_mask)
+                       continue;
+
+               for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {
+                       // Destination mask
+                       ocelot_write_rix(ocelot, bond_mask,
+                                        ANA_PGID_PGID, port);
+                       aggr_idx[aggr_count] = port;
+                       aggr_count++;
+               }
+
+               for (i = PGID_AGGR; i < PGID_SRC; i++) {
+                       u32 ac;
+
+                       ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i);
+                       ac &= ~bond_mask;
+                       ac |= BIT(aggr_idx[i % aggr_count]);
+                       ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i);
+               }
+       }
+}
+
+static void ocelot_setup_lag(struct ocelot *ocelot, int lag)
+{
+       unsigned long bond_mask = ocelot->lags[lag];
+       unsigned int p;
+
+       for_each_set_bit(p, &bond_mask, ocelot->num_phys_ports) {
+               u32 port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, p);
+
+               port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
+
+               /* Use lag port as logical port for port i */
+               ocelot_write_gix(ocelot, port_cfg |
+                                ANA_PORT_PORT_CFG_PORTID_VAL(lag),
+                                ANA_PORT_PORT_CFG, p);
+       }
+}
+
+static int ocelot_port_lag_join(struct ocelot_port *ocelot_port,
+                               struct net_device *bond)
+{
+       struct ocelot *ocelot = ocelot_port->ocelot;
+       int p = ocelot_port->chip_port;
+       int lag, lp;
+       struct net_device *ndev;
+       u32 bond_mask = 0;
+
+       rcu_read_lock();
+       for_each_netdev_in_bond_rcu(bond, ndev) {
+               struct ocelot_port *port = netdev_priv(ndev);
+
+               bond_mask |= BIT(port->chip_port);
+       }
+       rcu_read_unlock();
+
+       lp = __ffs(bond_mask);
+
+       /* If the new port is the lowest one, use it as the logical port from
+        * now on
+        */
+       if (p == lp) {
+               lag = p;
+               ocelot->lags[p] = bond_mask;
+               bond_mask &= ~BIT(p);
+               if (bond_mask) {
+                       lp = __ffs(bond_mask);
+                       ocelot->lags[lp] = 0;
+               }
+       } else {
+               lag = lp;
+               ocelot->lags[lp] |= BIT(p);
+       }
+
+       ocelot_setup_lag(ocelot, lag);
+       ocelot_set_aggr_pgids(ocelot);
+
+       return 0;
+}
+
+static void ocelot_port_lag_leave(struct ocelot_port *ocelot_port,
+                                 struct net_device *bond)
+{
+       struct ocelot *ocelot = ocelot_port->ocelot;
+       int p = ocelot_port->chip_port;
+       u32 port_cfg;
+       int i;
+
+       /* Remove port from any lag */
+       for (i = 0; i < ocelot->num_phys_ports; i++)
+               ocelot->lags[i] &= ~BIT(ocelot_port->chip_port);
+
+       /* if it was the logical port of the lag, move the lag config to the
+        * next port
+        */
+       if (ocelot->lags[p]) {
+               int n = __ffs(ocelot->lags[p]);
+
+               ocelot->lags[n] = ocelot->lags[p];
+               ocelot->lags[p] = 0;
+
+               ocelot_setup_lag(ocelot, n);
+       }
+
+       port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, p);
+       port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
+       ocelot_write_gix(ocelot, port_cfg | ANA_PORT_PORT_CFG_PORTID_VAL(p),
+                        ANA_PORT_PORT_CFG, p);
+
+       ocelot_set_aggr_pgids(ocelot);
 }
 
 /* Checks if the net_device instance given to us originate from our driver. */
@@ -1113,6 +1518,17 @@ static int ocelot_netdevice_port_event(struct net_device *dev,
                        else
                                ocelot_port_bridge_leave(ocelot_port,
                                                         info->upper_dev);
+
+                       ocelot_vlan_port_apply(ocelot_port->ocelot,
+                                              ocelot_port);
+               }
+               if (netif_is_lag_master(info->upper_dev)) {
+                       if (info->linking)
+                               err = ocelot_port_lag_join(ocelot_port,
+                                                          info->upper_dev);
+                       else
+                               ocelot_port_lag_leave(ocelot_port,
+                                                     info->upper_dev);
                }
                break;
        default:
@@ -1129,6 +1545,20 @@ static int ocelot_netdevice_event(struct notifier_block *unused,
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        int ret = 0;
 
+       if (event == NETDEV_PRECHANGEUPPER &&
+           netif_is_lag_master(info->upper_dev)) {
+               struct netdev_lag_upper_info *lag_upper_info = info->upper_info;
+               struct netlink_ext_ack *extack;
+
+               if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+                       extack = netdev_notifier_info_to_extack(&info->info);
+                       NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
+
+                       ret = -EINVAL;
+                       goto notify;
+               }
+       }
+
        if (netif_is_lag_master(dev)) {
                struct net_device *slave;
                struct list_head *iter;
@@ -1176,6 +1606,9 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
        dev->ethtool_ops = &ocelot_ethtool_ops;
        dev->switchdev_ops = &ocelot_port_switchdev_ops;
 
+       dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+       dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
        memcpy(dev->dev_addr, ocelot->base_mac, ETH_ALEN);
        dev->dev_addr[ETH_ALEN - 1] += port;
        ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, ocelot_port->pvid,
@@ -1187,6 +1620,9 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
                goto err_register_netdev;
        }
 
+       /* Basic L2 initialization */
+       ocelot_vlan_port_apply(ocelot, ocelot_port);
+
        return 0;
 
 err_register_netdev:
@@ -1201,6 +1637,11 @@ int ocelot_init(struct ocelot *ocelot)
        int i, cpu = ocelot->num_phys_ports;
        char queue_name[32];
 
+       ocelot->lags = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports,
+                                   sizeof(u32), GFP_KERNEL);
+       if (!ocelot->lags)
+               return -ENOMEM;
+
        ocelot->stats = devm_kcalloc(ocelot->dev,
                                     ocelot->num_phys_ports * ocelot->num_stats,
                                     sizeof(u64), GFP_KERNEL);
index 097bd12a10d4958c36353c7b1ea38f63f307d511..616bec30dfa3fe4b31a1295ef2239a57ac2cc36d 100644 (file)
@@ -493,7 +493,7 @@ struct ocelot {
        u8 num_cpu_ports;
        struct ocelot_port **ports;
 
-       u16 lags[16];
+       u32 *lags;
 
        /* Keep track of the vlan port masks */
        u32 vlan_mask[VLAN_N_VID];
index 18df7d934e810a56b967120ac1f7e9d0507d13af..26bb3b18f3be0f9f20149e5e689c561074c60a26 100644 (file)
@@ -29,7 +29,7 @@ static int ocelot_parse_ifh(u32 *ifh, struct frame_info *info)
        info->port = (ifh[2] & GENMASK(14, 11)) >> 11;
 
        info->cpuq = (ifh[3] & GENMASK(27, 20)) >> 20;
-       info->tag_type = (ifh[3] & GENMASK(16, 16)) >> 16;
+       info->tag_type = (ifh[3] & BIT(16)) >> 16;
        info->vid = ifh[3] & GENMASK(11, 0);
 
        return 0;
index 71899009c468af4258ae3e5ede1f22dd9734aca0..c26e0f70c494f530da0bd02f5ccfbc769591d3e6 100644 (file)
@@ -2,8 +2,8 @@
 # Exar device configuration
 #
 
-config NET_VENDOR_EXAR
-       bool "Exar devices"
+config NET_VENDOR_NETERION
+       bool "Neterion (Exar) devices"
        default y
        depends on PCI
        ---help---
@@ -11,16 +11,19 @@ config NET_VENDOR_EXAR
 
          Note that the answer to this question doesn't directly affect the
          kernel: saying N will just cause the configurator to skip all
-         the questions about Exar cards. If you say Y, you will be asked for
-         your specific card in the following questions.
+         the questions about Neterion/Exar cards. If you say Y, you will be
+         asked for your specific card in the following questions.
 
-if NET_VENDOR_EXAR
+if NET_VENDOR_NETERION
 
 config S2IO
-       tristate "Exar Xframe 10Gb Ethernet Adapter"
+       tristate "Neterion (Exar) Xframe 10Gb Ethernet Adapter"
        depends on PCI
        ---help---
          This driver supports Exar Corp's Xframe Series 10Gb Ethernet Adapters.
+         These were originally released from S2IO, which renamed itself
+         Neterion.  So, the adapters might be labeled as either one, depending
+         on its age.
 
          More specific information on configuring the driver is in
          <file:Documentation/networking/s2io.txt>.
@@ -29,11 +32,13 @@ config S2IO
          will be called s2io.
 
 config VXGE
-       tristate "Exar X3100 Series 10GbE PCIe Server Adapter"
+       tristate "Neterion (Exar) X3100 Series 10GbE PCIe Server Adapter"
        depends on PCI
        ---help---
          This driver supports Exar Corp's X3100 Series 10 GbE PCIe
-         I/O Virtualized Server Adapter.
+         I/O Virtualized Server Adapter.  These were originally released from
+         Neterion, which was later acquired by Exar.  So, the adapters might be
+         labeled as either one, depending on its age.
 
          More specific information on configuring the driver is in
          <file:Documentation/networking/vxge.txt>.
@@ -50,4 +55,4 @@ config VXGE_DEBUG_TRACE_ALL
          the vxge driver. By default only few debug trace statements are
          enabled.
 
-endif # NET_VENDOR_EXAR
+endif # NET_VENDOR_NETERION
index 358ed6118881567c960a56fc876aebf8f5259ba9..a2c0a93ca8b6f0096546594a163487ccac52cbd4 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/vmalloc.h>
 #include <linux/etherdevice.h>
 #include <linux/pci.h>
-#include <linux/pci_hotplug.h>
 #include <linux/slab.h>
 
 #include "vxge-traffic.h"
index 8a92088df0d7bf4966342631fb412b94f79a3ff4..1d9e3683540439da2ee7d6e57fec4080542ed7ba 100644 (file)
 #define pr_fmt(fmt)    "NFP net bpf: " fmt
 
 #include <linux/bug.h>
-#include <linux/kernel.h>
 #include <linux/bpf.h>
 #include <linux/filter.h>
+#include <linux/kernel.h>
 #include <linux/pkt_cls.h>
+#include <linux/reciprocal_div.h>
 #include <linux/unistd.h>
 
 #include "main.h"
@@ -415,6 +416,60 @@ emit_alu(struct nfp_prog *nfp_prog, swreg dst,
                   reg.dst_lmextn, reg.src_lmextn);
 }
 
+static void
+__emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg,
+          enum mul_type type, enum mul_step step, u16 breg, bool swap,
+          bool wr_both, bool dst_lmextn, bool src_lmextn)
+{
+       u64 insn;
+
+       insn = OP_MUL_BASE |
+               FIELD_PREP(OP_MUL_A_SRC, areg) |
+               FIELD_PREP(OP_MUL_B_SRC, breg) |
+               FIELD_PREP(OP_MUL_STEP, step) |
+               FIELD_PREP(OP_MUL_DST_AB, dst_ab) |
+               FIELD_PREP(OP_MUL_SW, swap) |
+               FIELD_PREP(OP_MUL_TYPE, type) |
+               FIELD_PREP(OP_MUL_WR_AB, wr_both) |
+               FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) |
+               FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn);
+
+       nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type,
+        enum mul_step step, swreg rreg)
+{
+       struct nfp_insn_ur_regs reg;
+       u16 areg;
+       int err;
+
+       if (type == MUL_TYPE_START && step != MUL_STEP_NONE) {
+               nfp_prog->error = -EINVAL;
+               return;
+       }
+
+       if (step == MUL_LAST || step == MUL_LAST_2) {
+               /* When type is step and step Number is LAST or LAST2, left
+                * source is used as destination.
+                */
+               err = swreg_to_unrestricted(lreg, reg_none(), rreg, &reg);
+               areg = reg.dst;
+       } else {
+               err = swreg_to_unrestricted(reg_none(), lreg, rreg, &reg);
+               areg = reg.areg;
+       }
+
+       if (err) {
+               nfp_prog->error = err;
+               return;
+       }
+
+       __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap,
+                  reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
+}
+
 static void
 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
                u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
@@ -670,7 +725,7 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        xfer_num = round_up(len, 4) / 4;
 
        if (src_40bit_addr)
-               addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base,
+               addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base,
                              &off);
 
        /* Setup PREV_ALU fields to override memory read length. */
@@ -1380,6 +1435,133 @@ static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
                      SHF_SC_R_ROT, 16);
 }
 
+static void
+wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
+           swreg rreg, bool gen_high_half)
+{
+       emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
+       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg);
+       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg);
+       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg);
+       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg);
+       emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none());
+       if (gen_high_half)
+               emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2,
+                        reg_none());
+       else
+               wrp_immed(nfp_prog, dst_hi, 0);
+}
+
+static void
+wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
+           swreg rreg)
+{
+       emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
+       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg);
+       emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg);
+       emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none());
+}
+
+static int
+wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+       bool gen_high_half, bool ropnd_from_reg)
+{
+       swreg multiplier, multiplicand, dst_hi, dst_lo;
+       const struct bpf_insn *insn = &meta->insn;
+       u32 lopnd_max, ropnd_max;
+       u8 dst_reg;
+
+       dst_reg = insn->dst_reg;
+       multiplicand = reg_a(dst_reg * 2);
+       dst_hi = reg_both(dst_reg * 2 + 1);
+       dst_lo = reg_both(dst_reg * 2);
+       lopnd_max = meta->umax_dst;
+       if (ropnd_from_reg) {
+               multiplier = reg_b(insn->src_reg * 2);
+               ropnd_max = meta->umax_src;
+       } else {
+               u32 imm = insn->imm;
+
+               multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
+               ropnd_max = imm;
+       }
+       if (lopnd_max > U16_MAX || ropnd_max > U16_MAX)
+               wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier,
+                           gen_high_half);
+       else
+               wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier);
+
+       return 0;
+}
+
+static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
+{
+       swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst);
+       struct reciprocal_value_adv rvalue;
+       u8 pre_shift, exp;
+       swreg magic;
+
+       if (imm > U32_MAX) {
+               wrp_immed(nfp_prog, dst_both, 0);
+               return 0;
+       }
+
+       /* NOTE: because we are using "reciprocal_value_adv" which doesn't
+        * support "divisor > (1u << 31)", we need to JIT separate NFP sequence
+        * to handle such case which actually equals to the result of unsigned
+        * comparison "dst >= imm" which could be calculated using the following
+        * NFP sequence:
+        *
+        *  alu[--, dst, -, imm]
+        *  immed[imm, 0]
+        *  alu[dst, imm, +carry, 0]
+        *
+        */
+       if (imm > 1U << 31) {
+               swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
+
+               emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b);
+               wrp_immed(nfp_prog, imm_a(nfp_prog), 0);
+               emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C,
+                        reg_imm(0));
+               return 0;
+       }
+
+       rvalue = reciprocal_value_adv(imm, 32);
+       exp = rvalue.exp;
+       if (rvalue.is_wide_m && !(imm & 1)) {
+               pre_shift = fls(imm & -imm) - 1;
+               rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
+       } else {
+               pre_shift = 0;
+       }
+       magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
+       if (imm == 1U << exp) {
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, exp);
+       } else if (rvalue.is_wide_m) {
+               wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a,
+                           magic, true);
+               emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB,
+                        imm_b(nfp_prog));
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, 1);
+               emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD,
+                        imm_b(nfp_prog));
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+                        SHF_SC_R_SHF, rvalue.sh - 1);
+       } else {
+               if (pre_shift)
+                       emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+                                dst_b, SHF_SC_R_SHF, pre_shift);
+               wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true);
+               emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+                        dst_b, SHF_SC_R_SHF, rvalue.sh);
+       }
+
+       return 0;
+}
+
 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
        swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
@@ -1684,6 +1866,31 @@ static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        return 0;
 }
 
+static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       return wrp_mul(nfp_prog, meta, true, true);
+}
+
+static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       return wrp_mul(nfp_prog, meta, true, false);
+}
+
+static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       const struct bpf_insn *insn = &meta->insn;
+
+       return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm);
+}
+
+static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       /* NOTE: verifier hook has rejected cases for which verifier doesn't
+        * know whether the source operand is constant or not.
+        */
+       return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src);
+}
+
 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
        const struct bpf_insn *insn = &meta->insn;
@@ -1772,8 +1979,8 @@ static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        u8 dst, src;
 
        dst = insn->dst_reg * 2;
-       umin = meta->umin;
-       umax = meta->umax;
+       umin = meta->umin_src;
+       umax = meta->umax_src;
        if (umin == umax)
                return __shl_imm64(nfp_prog, dst, umin);
 
@@ -1881,8 +2088,8 @@ static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        u8 dst, src;
 
        dst = insn->dst_reg * 2;
-       umin = meta->umin;
-       umax = meta->umax;
+       umin = meta->umin_src;
+       umax = meta->umax_src;
        if (umin == umax)
                return __shr_imm64(nfp_prog, dst, umin);
 
@@ -1995,8 +2202,8 @@ static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        u8 dst, src;
 
        dst = insn->dst_reg * 2;
-       umin = meta->umin;
-       umax = meta->umax;
+       umin = meta->umin_src;
+       umax = meta->umax_src;
        if (umin == umax)
                return __ashr_imm64(nfp_prog, dst, umin);
 
@@ -2097,6 +2304,26 @@ static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
 }
 
+static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       return wrp_mul(nfp_prog, meta, false, true);
+}
+
+static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       return wrp_mul(nfp_prog, meta, false, false);
+}
+
+static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       return div_reg64(nfp_prog, meta);
+}
+
+static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       return div_imm64(nfp_prog, meta);
+}
+
 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
        u8 dst = meta->insn.dst_reg * 2;
@@ -2848,6 +3075,10 @@ static const instr_cb_t instr_cb[256] = {
        [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64,
        [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
        [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
+       [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64,
+       [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64,
+       [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64,
+       [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64,
        [BPF_ALU64 | BPF_NEG] =         neg_reg64,
        [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64,
        [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
@@ -2867,6 +3098,10 @@ static const instr_cb_t instr_cb[256] = {
        [BPF_ALU | BPF_ADD | BPF_K] =   add_imm,
        [BPF_ALU | BPF_SUB | BPF_X] =   sub_reg,
        [BPF_ALU | BPF_SUB | BPF_K] =   sub_imm,
+       [BPF_ALU | BPF_MUL | BPF_X] =   mul_reg,
+       [BPF_ALU | BPF_MUL | BPF_K] =   mul_imm,
+       [BPF_ALU | BPF_DIV | BPF_X] =   div_reg,
+       [BPF_ALU | BPF_DIV | BPF_K] =   div_imm,
        [BPF_ALU | BPF_NEG] =           neg_reg,
        [BPF_ALU | BPF_LSH | BPF_K] =   shl_imm,
        [BPF_ALU | BPF_END | BPF_X] =   end_reg32,
@@ -3299,7 +3534,8 @@ curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
        if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
                return false;
 
-       if (ld_meta->ptr.type != PTR_TO_PACKET)
+       if (ld_meta->ptr.type != PTR_TO_PACKET &&
+           ld_meta->ptr.type != PTR_TO_MAP_VALUE)
                return false;
 
        if (st_meta->ptr.type != PTR_TO_PACKET)
index 40216d56dddcb73d997ed4e4c48e63868610da89..994d2b756fe16881380f78f67628d546b14967ee 100644 (file)
@@ -66,26 +66,19 @@ nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
                    struct bpf_prog *prog, struct netlink_ext_ack *extack)
 {
        bool running, xdp_running;
-       int ret;
 
        if (!nfp_net_ebpf_capable(nn))
                return -EINVAL;
 
        running = nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF;
-       xdp_running = running && nn->dp.bpf_offload_xdp;
+       xdp_running = running && nn->xdp_hw.prog;
 
        if (!prog && !xdp_running)
                return 0;
        if (prog && running && !xdp_running)
                return -EBUSY;
 
-       ret = nfp_net_bpf_offload(nn, prog, running, extack);
-       /* Stop offload if replace not possible */
-       if (ret)
-               return ret;
-
-       nn->dp.bpf_offload_xdp = !!prog;
-       return ret;
+       return nfp_net_bpf_offload(nn, prog, running, extack);
 }
 
 static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
@@ -202,14 +195,11 @@ static int nfp_bpf_setup_tc_block(struct net_device *netdev,
        if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
                return -EOPNOTSUPP;
 
-       if (tcf_block_shared(f->block))
-               return -EOPNOTSUPP;
-
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block,
                                             nfp_bpf_setup_tc_block_cb,
-                                            nn, nn);
+                                            nn, nn, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block,
                                        nfp_bpf_setup_tc_block_cb,
@@ -411,6 +401,20 @@ err_release_free:
        return -EINVAL;
 }
 
+static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev)
+{
+       struct nfp_app_bpf *bpf = app->priv;
+
+       return bpf_offload_dev_netdev_register(bpf->bpf_dev, netdev);
+}
+
+static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev)
+{
+       struct nfp_app_bpf *bpf = app->priv;
+
+       bpf_offload_dev_netdev_unregister(bpf->bpf_dev, netdev);
+}
+
 static int nfp_bpf_init(struct nfp_app *app)
 {
        struct nfp_app_bpf *bpf;
@@ -434,6 +438,11 @@ static int nfp_bpf_init(struct nfp_app *app)
        if (err)
                goto err_free_neutral_maps;
 
+       bpf->bpf_dev = bpf_offload_dev_create();
+       err = PTR_ERR_OR_ZERO(bpf->bpf_dev);
+       if (err)
+               goto err_free_neutral_maps;
+
        return 0;
 
 err_free_neutral_maps:
@@ -452,6 +461,7 @@ static void nfp_bpf_clean(struct nfp_app *app)
 {
        struct nfp_app_bpf *bpf = app->priv;
 
+       bpf_offload_dev_destroy(bpf->bpf_dev);
        WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
        WARN_ON(!list_empty(&bpf->map_list));
        WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
@@ -473,6 +483,9 @@ const struct nfp_app_type app_bpf = {
 
        .extra_cap      = nfp_bpf_extra_cap,
 
+       .ndo_init       = nfp_bpf_ndo_init,
+       .ndo_uninit     = nfp_bpf_ndo_uninit,
+
        .vnic_alloc     = nfp_bpf_vnic_alloc,
        .vnic_free      = nfp_bpf_vnic_free,
 
index 654fe7823e5ed4ab82a1b1e0c35fa43c1486bb28..bec935468f90ab1f13da2832ed463d73e4b86884 100644 (file)
@@ -110,6 +110,8 @@ enum pkt_vec {
  * struct nfp_app_bpf - bpf app priv structure
  * @app:               backpointer to the app
  *
+ * @bpf_dev:           BPF offload device handle
+ *
  * @tag_allocator:     bitmap of control message tags in use
  * @tag_alloc_next:    next tag bit to allocate
  * @tag_alloc_last:    next tag bit to be freed
@@ -150,6 +152,8 @@ enum pkt_vec {
 struct nfp_app_bpf {
        struct nfp_app *app;
 
+       struct bpf_offload_dev *bpf_dev;
+
        DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
        u16 tag_alloc_next;
        u16 tag_alloc_last;
@@ -263,8 +267,10 @@ struct nfp_bpf_reg_state {
  * @func_id: function id for call instructions
  * @arg1: arg1 for call instructions
  * @arg2: arg2 for call instructions
- * @umin: copy of core verifier umin_value.
- * @umax: copy of core verifier umax_value.
+ * @umin_src: copy of core verifier umin_value for src opearnd.
+ * @umax_src: copy of core verifier umax_value for src operand.
+ * @umin_dst: copy of core verifier umin_value for dst opearnd.
+ * @umax_dst: copy of core verifier umax_value for dst operand.
  * @off: index of first generated machine instruction (in nfp_prog.prog)
  * @n: eBPF instruction number
  * @flags: eBPF instruction extra optimization flags
@@ -300,12 +306,15 @@ struct nfp_insn_meta {
                        struct bpf_reg_state arg1;
                        struct nfp_bpf_reg_state arg2;
                };
-               /* We are interested in range info for some operands,
-                * for example, the shift amount.
+               /* We are interested in range info for operands of ALU
+                * operations. For example, shift amount, multiplicand and
+                * multiplier etc.
                 */
                struct {
-                       u64 umin;
-                       u64 umax;
+                       u64 umin_src;
+                       u64 umax_src;
+                       u64 umin_dst;
+                       u64 umax_dst;
                };
        };
        unsigned int off;
@@ -339,6 +348,11 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
        return BPF_MODE(meta->insn.code);
 }
 
+static inline bool is_mbpf_alu(const struct nfp_insn_meta *meta)
+{
+       return mbpf_class(meta) == BPF_ALU64 || mbpf_class(meta) == BPF_ALU;
+}
+
 static inline bool is_mbpf_load(const struct nfp_insn_meta *meta)
 {
        return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM);
@@ -384,23 +398,14 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
        return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
 }
 
-static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta)
+static inline bool is_mbpf_mul(const struct nfp_insn_meta *meta)
 {
-       u8 code = meta->insn.code;
-       bool is_alu, is_shift;
-       u8 opclass, opcode;
-
-       opclass = BPF_CLASS(code);
-       is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU;
-       if (!is_alu)
-               return false;
-
-       opcode = BPF_OP(code);
-       is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH;
-       if (!is_shift)
-               return false;
+       return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_MUL;
+}
 
-       return BPF_SRC(code) == BPF_X;
+static inline bool is_mbpf_div(const struct nfp_insn_meta *meta)
+{
+       return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_DIV;
 }
 
 /**
index 7eae4c0266f811247a1b5e4ad6aadf70da44a2c5..49b03f7dbf4625de7cd1d03eb0d5bc448da9de14 100644 (file)
@@ -190,8 +190,10 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
 
                meta->insn = prog[i];
                meta->n = i;
-               if (is_mbpf_indir_shift(meta))
-                       meta->umin = U64_MAX;
+               if (is_mbpf_alu(meta)) {
+                       meta->umin_src = U64_MAX;
+                       meta->umin_dst = U64_MAX;
+               }
 
                list_add_tail(&meta->l, &nfp_prog->insns);
        }
@@ -564,14 +566,8 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
 {
        int err;
 
-       if (prog) {
-               struct bpf_prog_offload *offload = prog->aux->offload;
-
-               if (!offload)
-                       return -EINVAL;
-               if (offload->netdev != nn->dp.netdev)
-                       return -EINVAL;
-       }
+       if (prog && !bpf_offload_dev_match(prog, nn->dp.netdev))
+               return -EINVAL;
 
        if (prog && old_prog) {
                u8 cap;
index 4bfeba7b21b226d03b573617079490a4aa69f0a5..49ba0d645d36df879d6885309603426c383a17e6 100644 (file)
@@ -516,6 +516,82 @@ nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
        return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
 }
 
+static int
+nfp_bpf_check_alu(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+                 struct bpf_verifier_env *env)
+{
+       const struct bpf_reg_state *sreg =
+               cur_regs(env) + meta->insn.src_reg;
+       const struct bpf_reg_state *dreg =
+               cur_regs(env) + meta->insn.dst_reg;
+
+       meta->umin_src = min(meta->umin_src, sreg->umin_value);
+       meta->umax_src = max(meta->umax_src, sreg->umax_value);
+       meta->umin_dst = min(meta->umin_dst, dreg->umin_value);
+       meta->umax_dst = max(meta->umax_dst, dreg->umax_value);
+
+       /* NFP supports u16 and u32 multiplication.
+        *
+        * For ALU64, if either operand is beyond u32's value range, we reject
+        * it. One thing to note, if the source operand is BPF_K, then we need
+        * to check "imm" field directly, and we'd reject it if it is negative.
+        * Because for ALU64, "imm" (with s32 type) is expected to be sign
+        * extended to s64 which NFP mul doesn't support.
+        *
+        * For ALU32, it is fine for "imm" be negative though, because the
+        * result is 32-bits and there is no difference on the low halve of
+        * the result for signed/unsigned mul, so we will get correct result.
+        */
+       if (is_mbpf_mul(meta)) {
+               if (meta->umax_dst > U32_MAX) {
+                       pr_vlog(env, "multiplier is not within u32 value range\n");
+                       return -EINVAL;
+               }
+               if (mbpf_src(meta) == BPF_X && meta->umax_src > U32_MAX) {
+                       pr_vlog(env, "multiplicand is not within u32 value range\n");
+                       return -EINVAL;
+               }
+               if (mbpf_class(meta) == BPF_ALU64 &&
+                   mbpf_src(meta) == BPF_K && meta->insn.imm < 0) {
+                       pr_vlog(env, "sign extended multiplicand won't be within u32 value range\n");
+                       return -EINVAL;
+               }
+       }
+
+       /* NFP doesn't have divide instructions, we support divide by constant
+        * through reciprocal multiplication. Given NFP support multiplication
+        * no bigger than u32, we'd require divisor and dividend no bigger than
+        * that as well.
+        *
+        * Also eBPF doesn't support signed divide and has enforced this on C
+        * language level by failing compilation. However LLVM assembler hasn't
+        * enforced this, so it is possible for negative constant to leak in as
+        * a BPF_K operand through assembly code, we reject such cases as well.
+        */
+       if (is_mbpf_div(meta)) {
+               if (meta->umax_dst > U32_MAX) {
+                       pr_vlog(env, "dividend is not within u32 value range\n");
+                       return -EINVAL;
+               }
+               if (mbpf_src(meta) == BPF_X) {
+                       if (meta->umin_src != meta->umax_src) {
+                               pr_vlog(env, "divisor is not constant\n");
+                               return -EINVAL;
+                       }
+                       if (meta->umax_src > U32_MAX) {
+                               pr_vlog(env, "divisor is not within u32 value range\n");
+                               return -EINVAL;
+                       }
+               }
+               if (mbpf_src(meta) == BPF_K && meta->insn.imm < 0) {
+                       pr_vlog(env, "divide by negative constant is not supported\n");
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
 static int
 nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 {
@@ -551,13 +627,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
        if (is_mbpf_xadd(meta))
                return nfp_bpf_check_xadd(nfp_prog, meta, env);
 
-       if (is_mbpf_indir_shift(meta)) {
-               const struct bpf_reg_state *sreg =
-                       cur_regs(env) + meta->insn.src_reg;
-
-               meta->umin = min(meta->umin, sreg->umin_value);
-               meta->umax = max(meta->umax, sreg->umax_value);
-       }
+       if (is_mbpf_alu(meta))
+               return nfp_bpf_check_alu(nfp_prog, meta, env);
 
        return 0;
 }
index 4a6d2db750719a7d144e337f0aea9e8daf5a7104..e56b815a8dc6c81729083eb7abe717402a6a347b 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/bitfield.h>
 #include <net/pkt_cls.h>
 #include <net/switchdev.h>
+#include <net/tc_act/tc_csum.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_pedit.h>
@@ -44,6 +45,8 @@
 #include "main.h"
 #include "../nfp_net_repr.h"
 
+#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS    (TUNNEL_CSUM | TUNNEL_KEY)
+
 static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
 {
        size_t act_size = sizeof(struct nfp_fl_pop_vlan);
@@ -235,9 +238,12 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
        size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun);
        struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action);
        u32 tmp_set_ip_tun_type_index = 0;
+       struct flowi4 flow = {};
        /* Currently support one pre-tunnel so index is always 0. */
        int pretun_idx = 0;
+       struct rtable *rt;
        struct net *net;
+       int err;
 
        if (ip_tun->options_len)
                return -EOPNOTSUPP;
@@ -254,7 +260,28 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun,
 
        set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index);
        set_tun->tun_id = ip_tun->key.tun_id;
-       set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+
+       /* Do a route lookup to determine ttl - if fails then use default.
+        * Note that CONFIG_INET is a requirement of CONFIG_NET_SWITCHDEV so
+        * must be defined here.
+        */
+       flow.daddr = ip_tun->key.u.ipv4.dst;
+       flow.flowi4_proto = IPPROTO_UDP;
+       rt = ip_route_output_key(net, &flow);
+       err = PTR_ERR_OR_ZERO(rt);
+       if (!err) {
+               set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
+               ip_rt_put(rt);
+       } else {
+               set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+       }
+
+       set_tun->tos = ip_tun->key.tos;
+
+       if (!(ip_tun->key.tun_flags & TUNNEL_KEY) ||
+           ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS)
+               return -EOPNOTSUPP;
+       set_tun->tun_flags = ip_tun->key.tun_flags;
 
        /* Complete pre_tunnel action. */
        pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst;
@@ -398,8 +425,27 @@ nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off,
        return 0;
 }
 
+static u32 nfp_fl_csum_l4_to_flag(u8 ip_proto)
+{
+       switch (ip_proto) {
+       case 0:
+               /* Filter doesn't force proto match,
+                * both TCP and UDP will be updated if encountered
+                */
+               return TCA_CSUM_UPDATE_FLAG_TCP | TCA_CSUM_UPDATE_FLAG_UDP;
+       case IPPROTO_TCP:
+               return TCA_CSUM_UPDATE_FLAG_TCP;
+       case IPPROTO_UDP:
+               return TCA_CSUM_UPDATE_FLAG_UDP;
+       default:
+               /* All other protocols will be ignored by FW */
+               return 0;
+       }
+}
+
 static int
-nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len)
+nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
+            char *nfp_action, int *a_len, u32 *csum_updated)
 {
        struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src;
        struct nfp_fl_set_ip4_addrs set_ip_addr;
@@ -409,6 +455,7 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len)
        int idx, nkeys, err;
        size_t act_size;
        u32 offset, cmd;
+       u8 ip_proto = 0;
 
        memset(&set_ip6_dst, 0, sizeof(set_ip6_dst));
        memset(&set_ip6_src, 0, sizeof(set_ip6_src));
@@ -451,6 +498,15 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len)
                        return err;
        }
 
+       if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+               struct flow_dissector_key_basic *basic;
+
+               basic = skb_flow_dissector_target(flow->dissector,
+                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 flow->key);
+               ip_proto = basic->ip_proto;
+       }
+
        if (set_eth.head.len_lw) {
                act_size = sizeof(set_eth);
                memcpy(nfp_action, &set_eth, act_size);
@@ -459,6 +515,10 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len)
                act_size = sizeof(set_ip_addr);
                memcpy(nfp_action, &set_ip_addr, act_size);
                *a_len += act_size;
+
+               /* Hardware will automatically fix IPv4 and TCP/UDP checksum. */
+               *csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR |
+                               nfp_fl_csum_l4_to_flag(ip_proto);
        } else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) {
                /* TC compiles set src and dst IPv6 address as a single action,
                 * the hardware requires this to be 2 separate actions.
@@ -471,18 +531,30 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len)
                memcpy(&nfp_action[sizeof(set_ip6_src)], &set_ip6_dst,
                       act_size);
                *a_len += act_size;
+
+               /* Hardware will automatically fix TCP/UDP checksum. */
+               *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
        } else if (set_ip6_dst.head.len_lw) {
                act_size = sizeof(set_ip6_dst);
                memcpy(nfp_action, &set_ip6_dst, act_size);
                *a_len += act_size;
+
+               /* Hardware will automatically fix TCP/UDP checksum. */
+               *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
        } else if (set_ip6_src.head.len_lw) {
                act_size = sizeof(set_ip6_src);
                memcpy(nfp_action, &set_ip6_src, act_size);
                *a_len += act_size;
+
+               /* Hardware will automatically fix TCP/UDP checksum. */
+               *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
        } else if (set_tport.head.len_lw) {
                act_size = sizeof(set_tport);
                memcpy(nfp_action, &set_tport, act_size);
                *a_len += act_size;
+
+               /* Hardware will automatically fix TCP/UDP checksum. */
+               *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
        }
 
        return 0;
@@ -493,12 +565,18 @@ nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a,
                         struct nfp_fl_payload *nfp_fl, int *a_len,
                         struct net_device *netdev, bool last,
                         enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
-                        int *out_cnt)
+                        int *out_cnt, u32 *csum_updated)
 {
        struct nfp_flower_priv *priv = app->priv;
        struct nfp_fl_output *output;
        int err, prelag_size;
 
+       /* If csum_updated has not been reset by now, it means HW will
+        * incorrectly update csums when they are not requested.
+        */
+       if (*csum_updated)
+               return -EOPNOTSUPP;
+
        if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
                return -EOPNOTSUPP;
 
@@ -529,10 +607,11 @@ nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a,
 
 static int
 nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,
+                      struct tc_cls_flower_offload *flow,
                       struct nfp_fl_payload *nfp_fl, int *a_len,
                       struct net_device *netdev,
                       enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
-                      int *out_cnt)
+                      int *out_cnt, u32 *csum_updated)
 {
        struct nfp_fl_set_ipv4_udp_tun *set_tun;
        struct nfp_fl_pre_tunnel *pre_tun;
@@ -545,14 +624,14 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,
        } else if (is_tcf_mirred_egress_redirect(a)) {
                err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev,
                                               true, tun_type, tun_out_cnt,
-                                              out_cnt);
+                                              out_cnt, csum_updated);
                if (err)
                        return err;
 
        } else if (is_tcf_mirred_egress_mirror(a)) {
                err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev,
                                               false, tun_type, tun_out_cnt,
-                                              out_cnt);
+                                              out_cnt, csum_updated);
                if (err)
                        return err;
 
@@ -602,8 +681,17 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,
                /* Tunnel decap is handled by default so accept action. */
                return 0;
        } else if (is_tcf_pedit(a)) {
-               if (nfp_fl_pedit(a, &nfp_fl->action_data[*a_len], a_len))
+               if (nfp_fl_pedit(a, flow, &nfp_fl->action_data[*a_len],
+                                a_len, csum_updated))
+                       return -EOPNOTSUPP;
+       } else if (is_tcf_csum(a)) {
+               /* csum action requests recalc of something we have not fixed */
+               if (tcf_csum_update_flags(a) & ~*csum_updated)
                        return -EOPNOTSUPP;
+               /* If we will correctly fix the csum we can remove it from the
+                * csum update list. Which will later be used to check support.
+                */
+               *csum_updated &= ~tcf_csum_update_flags(a);
        } else {
                /* Currently we do not handle any other actions. */
                return -EOPNOTSUPP;
@@ -620,6 +708,7 @@ int nfp_flower_compile_action(struct nfp_app *app,
        int act_len, act_cnt, err, tun_out_cnt, out_cnt;
        enum nfp_flower_tun_type tun_type;
        const struct tc_action *a;
+       u32 csum_updated = 0;
        LIST_HEAD(actions);
 
        memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ);
@@ -632,8 +721,9 @@ int nfp_flower_compile_action(struct nfp_app *app,
 
        tcf_exts_to_list(flow->exts, &actions);
        list_for_each_entry(a, &actions, list) {
-               err = nfp_flower_loop_action(app, a, nfp_flow, &act_len, netdev,
-                                            &tun_type, &tun_out_cnt, &out_cnt);
+               err = nfp_flower_loop_action(app, a, flow, nfp_flow, &act_len,
+                                            netdev, &tun_type, &tun_out_cnt,
+                                            &out_cnt, &csum_updated);
                if (err)
                        return err;
                act_cnt++;
index 4a7f3510a2968154e9c4f78d8b2e14b673789a4e..15f1eacd76b6d381389e27a4ec860d1c2040bacb 100644 (file)
@@ -203,9 +203,9 @@ struct nfp_fl_set_ipv4_udp_tun {
        __be16 reserved;
        __be64 tun_id __packed;
        __be32 tun_type_index;
-       __be16 reserved2;
+       __be16 tun_flags;
        u8 ttl;
-       u8 reserved3;
+       u8 tos;
        __be32 extra[2];
 };
 
index 0c4c957717ea4b780d184b5577745cca8a5bf2f0..bf10598f66ae056a488074592ec4551fada4c319 100644 (file)
@@ -564,8 +564,9 @@ nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag,
        if (lag_upper_info &&
            lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
            (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH ||
-           (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 &&
-           lag_upper_info->hash_type != NETDEV_LAG_HASH_E34))) {
+            (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 &&
+             lag_upper_info->hash_type != NETDEV_LAG_HASH_E34 &&
+             lag_upper_info->hash_type != NETDEV_LAG_HASH_UNKNOWN))) {
                can_offload = false;
                nfp_flower_cmsg_warn(priv->app,
                                     "Unable to offload tx_type %u hash %u\n",
index bbe5764d26cb777f4292b82f4bbc78464706d9b7..ef2114d133872696cdb9ebbdeb44a97269079e57 100644 (file)
@@ -73,7 +73,7 @@ struct nfp_app;
 
 struct nfp_fl_mask_id {
        struct circ_buf mask_id_free_list;
-       struct timespec64 *last_used;
+       ktime_t *last_used;
        u8 init_unallocated;
 };
 
index 93fb809f50d1a7b0b6577c6a76ac0610e6797735..c098730544b76dae8ea6cab5ef5712cc40855c3e 100644 (file)
@@ -158,7 +158,6 @@ static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
 {
        struct nfp_flower_priv *priv = app->priv;
        struct circ_buf *ring;
-       struct timespec64 now;
 
        ring = &priv->mask_ids.mask_id_free_list;
        /* Checking if buffer is full. */
@@ -169,8 +168,7 @@ static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
        ring->head = (ring->head + NFP_FLOWER_MASK_ELEMENT_RS) %
                     (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS);
 
-       getnstimeofday64(&now);
-       priv->mask_ids.last_used[mask_id] = now;
+       priv->mask_ids.last_used[mask_id] = ktime_get();
 
        return 0;
 }
@@ -178,7 +176,7 @@ static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
 static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id)
 {
        struct nfp_flower_priv *priv = app->priv;
-       struct timespec64 delta, now;
+       ktime_t reuse_timeout;
        struct circ_buf *ring;
        u8 temp_id, freed_id;
 
@@ -198,10 +196,10 @@ static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id)
        memcpy(&temp_id, &ring->buf[ring->tail], NFP_FLOWER_MASK_ELEMENT_RS);
        *mask_id = temp_id;
 
-       getnstimeofday64(&now);
-       delta = timespec64_sub(now, priv->mask_ids.last_used[*mask_id]);
+       reuse_timeout = ktime_add_ns(priv->mask_ids.last_used[*mask_id],
+                                    NFP_FL_MASK_REUSE_TIME_NS);
 
-       if (timespec64_to_ns(&delta) < NFP_FL_MASK_REUSE_TIME_NS)
+       if (ktime_before(ktime_get(), reuse_timeout))
                goto err_not_found;
 
        memcpy(&ring->buf[ring->tail], &freed_id, NFP_FLOWER_MASK_ELEMENT_RS);
index 525057bee0ed8978f360d6eeb8293d8a990a0f22..6bc8a97f7e03a60edb3fd4d5c68906b074e7d090 100644 (file)
@@ -584,9 +584,9 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
                return nfp_flower_del_offload(app, netdev, flower, egress);
        case TC_CLSFLOWER_STATS:
                return nfp_flower_get_stats(app, netdev, flower, egress);
+       default:
+               return -EOPNOTSUPP;
        }
-
-       return -EOPNOTSUPP;
 }
 
 int nfp_flower_setup_tc_egress_cb(enum tc_setup_type type, void *type_data,
@@ -631,14 +631,11 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
        if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
                return -EOPNOTSUPP;
 
-       if (tcf_block_shared(f->block))
-               return -EOPNOTSUPP;
-
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block,
                                             nfp_flower_setup_tc_block_cb,
-                                            repr, repr);
+                                            repr, repr, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block,
                                        nfp_flower_setup_tc_block_cb,
index f28b244f4ee7ebcd7235b163c7c5f42a7ab120d9..69d4ae7a61f3a25339a77b00688e9c130d2d17d0 100644 (file)
@@ -86,6 +86,23 @@ const char *nfp_app_mip_name(struct nfp_app *app)
        return nfp_mip_name(app->pf->mip);
 }
 
+int nfp_app_ndo_init(struct net_device *netdev)
+{
+       struct nfp_app *app = nfp_app_from_netdev(netdev);
+
+       if (!app || !app->type->ndo_init)
+               return 0;
+       return app->type->ndo_init(app, netdev);
+}
+
+void nfp_app_ndo_uninit(struct net_device *netdev)
+{
+       struct nfp_app *app = nfp_app_from_netdev(netdev);
+
+       if (app && app->type->ndo_uninit)
+               app->type->ndo_uninit(app, netdev);
+}
+
 u64 *nfp_app_port_get_stats(struct nfp_port *port, u64 *data)
 {
        if (!port || !port->app || !port->app->type->port_get_stats)
index ee74caacb01530d9d3d40532648ff3e3b23ffb11..afbc19aa66a8a52acd8d4282390f4808d55e3316 100644 (file)
@@ -78,6 +78,8 @@ extern const struct nfp_app_type app_abm;
  * @init:      perform basic app checks and init
  * @clean:     clean app state
  * @extra_cap: extra capabilities string
+ * @ndo_init:  vNIC and repr netdev .ndo_init
+ * @ndo_uninit:        vNIC and repr netdev .ndo_unint
  * @vnic_alloc:        allocate vNICs (assign port types, etc.)
  * @vnic_free: free up app's vNIC state
  * @vnic_init: vNIC netdev was registered
@@ -117,6 +119,9 @@ struct nfp_app_type {
 
        const char *(*extra_cap)(struct nfp_app *app, struct nfp_net *nn);
 
+       int (*ndo_init)(struct nfp_app *app, struct net_device *netdev);
+       void (*ndo_uninit)(struct nfp_app *app, struct net_device *netdev);
+
        int (*vnic_alloc)(struct nfp_app *app, struct nfp_net *nn,
                          unsigned int id);
        void (*vnic_free)(struct nfp_app *app, struct nfp_net *nn);
@@ -200,6 +205,9 @@ static inline void nfp_app_clean(struct nfp_app *app)
                app->type->clean(app);
 }
 
+int nfp_app_ndo_init(struct net_device *netdev);
+void nfp_app_ndo_uninit(struct net_device *netdev);
+
 static inline int nfp_app_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
                                     unsigned int id)
 {
index f6677bc9875adea9dbfe842b15a6e6d53fc0915f..cdc4e065f6f50d8dff19dbac983547b5b211a804 100644 (file)
@@ -426,4 +426,32 @@ static inline u32 nfp_get_ind_csr_ctx_ptr_offs(u32 read_offset)
        return (read_offset & ~NFP_IND_ME_CTX_PTR_BASE_MASK) | NFP_CSR_CTX_PTR;
 }
 
+enum mul_type {
+       MUL_TYPE_START          = 0x00,
+       MUL_TYPE_STEP_24x8      = 0x01,
+       MUL_TYPE_STEP_16x16     = 0x02,
+       MUL_TYPE_STEP_32x32     = 0x03,
+};
+
+enum mul_step {
+       MUL_STEP_1              = 0x00,
+       MUL_STEP_NONE           = MUL_STEP_1,
+       MUL_STEP_2              = 0x01,
+       MUL_STEP_3              = 0x02,
+       MUL_STEP_4              = 0x03,
+       MUL_LAST                = 0x04,
+       MUL_LAST_2              = 0x05,
+};
+
+#define OP_MUL_BASE            0x0f800000000ULL
+#define OP_MUL_A_SRC           0x000000003ffULL
+#define OP_MUL_B_SRC           0x000000ffc00ULL
+#define OP_MUL_STEP            0x00000700000ULL
+#define OP_MUL_DST_AB          0x00000800000ULL
+#define OP_MUL_SW              0x00040000000ULL
+#define OP_MUL_TYPE            0x00180000000ULL
+#define OP_MUL_WR_AB           0x20000000000ULL
+#define OP_MUL_SRC_LMEXTN      0x40000000000ULL
+#define OP_MUL_DST_LMEXTN      0x80000000000ULL
+
 #endif
index 152283d7e59c8f4a7a69b45f520a7c9625e9ce16..4a540c5e27febdc5827955fb863a6c0f5834325d 100644 (file)
@@ -236,16 +236,20 @@ static int nfp_pcie_sriov_read_nfd_limit(struct nfp_pf *pf)
        int err;
 
        pf->limit_vfs = nfp_rtsym_read_le(pf->rtbl, "nfd_vf_cfg_max_vfs", &err);
-       if (!err)
-               return pci_sriov_set_totalvfs(pf->pdev, pf->limit_vfs);
+       if (err) {
+               /* For backwards compatibility if symbol not found allow all */
+               pf->limit_vfs = ~0;
+               if (err == -ENOENT)
+                       return 0;
 
-       pf->limit_vfs = ~0;
-       /* Allow any setting for backwards compatibility if symbol not found */
-       if (err == -ENOENT)
-               return 0;
+               nfp_warn(pf->cpp, "Warning: VF limit read failed: %d\n", err);
+               return err;
+       }
 
-       nfp_warn(pf->cpp, "Warning: VF limit read failed: %d\n", err);
-       return err;
+       err = pci_sriov_set_totalvfs(pf->pdev, pf->limit_vfs);
+       if (err)
+               nfp_warn(pf->cpp, "Failed to set VF count in sysfs: %d\n", err);
+       return 0;
 }
 
 static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs)
index 2a71a9ffd095a87e19c4546fc0838dafd7cbb28e..439e6ffe2f0538bbc7c2aa99daafd5377fb95f86 100644 (file)
@@ -250,7 +250,7 @@ struct nfp_net_tx_ring {
        struct nfp_net_tx_desc *txds;
 
        dma_addr_t dma;
-       unsigned int size;
+       size_t size;
        bool is_xdp;
 } ____cacheline_aligned;
 
@@ -350,9 +350,9 @@ struct nfp_net_rx_buf {
  * @qcp_fl:     Pointer to base of the QCP freelist queue
  * @rxbufs:     Array of transmitted FL/RX buffers
  * @rxds:       Virtual address of FL/RX ring in host memory
+ * @xdp_rxq:    RX-ring info avail for XDP
  * @dma:        DMA address of the FL/RX ring
  * @size:       Size, in bytes, of the FL/RX ring (needed to free)
- * @xdp_rxq:    RX-ring info avail for XDP
  */
 struct nfp_net_rx_ring {
        struct nfp_net_r_vector *r_vec;
@@ -364,14 +364,15 @@ struct nfp_net_rx_ring {
        u32 idx;
 
        int fl_qcidx;
-       unsigned int size;
        u8 __iomem *qcp_fl;
 
        struct nfp_net_rx_buf *rxbufs;
        struct nfp_net_rx_desc *rxds;
 
-       dma_addr_t dma;
        struct xdp_rxq_info xdp_rxq;
+
+       dma_addr_t dma;
+       size_t size;
 } ____cacheline_aligned;
 
 /**
@@ -485,7 +486,6 @@ struct nfp_stat_pair {
  * @dev:               Backpointer to struct device
  * @netdev:            Backpointer to net_device structure
  * @is_vf:             Is the driver attached to a VF?
- * @bpf_offload_xdp:   Offloaded BPF program is XDP
  * @chained_metadata_format:  Firemware will use new metadata format
  * @rx_dma_dir:                Mapping direction for RX buffers
  * @rx_dma_off:                Offset at which DMA packets (for XDP headroom)
@@ -510,7 +510,6 @@ struct nfp_net_dp {
        struct net_device *netdev;
 
        u8 is_vf:1;
-       u8 bpf_offload_xdp:1;
        u8 chained_metadata_format:1;
 
        u8 rx_dma_dir;
@@ -553,8 +552,8 @@ struct nfp_net_dp {
  * @rss_cfg:            RSS configuration
  * @rss_key:            RSS secret key
  * @rss_itbl:           RSS indirection table
- * @xdp_flags:         Flags with which XDP prog was loaded
- * @xdp_prog:          XDP prog (for ctrl path, both DRV and HW modes)
+ * @xdp:               Information about the driver XDP program
+ * @xdp_hw:            Information about the HW XDP program
  * @max_r_vecs:                Number of allocated interrupt vectors for RX/TX
  * @max_tx_rings:       Maximum number of TX rings supported by the Firmware
  * @max_rx_rings:       Maximum number of RX rings supported by the Firmware
@@ -610,8 +609,8 @@ struct nfp_net {
        u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ];
        u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ];
 
-       u32 xdp_flags;
-       struct bpf_prog *xdp_prog;
+       struct xdp_attachment_info xdp;
+       struct xdp_attachment_info xdp_hw;
 
        unsigned int max_tx_rings;
        unsigned int max_rx_rings;
index d4c27f849f9bbfae5d2d9e795fe28a000839bc07..7c1a921d178d452eb0f4a88e619af89af8032b01 100644 (file)
@@ -53,6 +53,8 @@
 #include <linux/interrupt.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/mm.h>
+#include <linux/overflow.h>
 #include <linux/page_ref.h>
 #include <linux/pci.h>
 #include <linux/pci_regs.h>
@@ -945,11 +947,12 @@ err_free:
 
 /**
  * nfp_net_tx_complete() - Handled completed TX packets
- * @tx_ring:   TX ring structure
+ * @tx_ring:   TX ring structure
+ * @budget:    NAPI budget (only used as bool to determine if in NAPI context)
  *
  * Return: Number of completed TX descriptors
  */
-static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
+static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
 {
        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
        struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
@@ -999,7 +1002,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
 
                /* check for last gather fragment */
                if (fidx == nr_frags - 1)
-                       dev_consume_skb_any(skb);
+                       napi_consume_skb(skb, budget);
 
                tx_ring->txbufs[idx].dma_addr = 0;
                tx_ring->txbufs[idx].skb = NULL;
@@ -1077,7 +1080,7 @@ static bool nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
  * @dp:                NFP Net data path struct
  * @tx_ring:   TX ring structure
  *
- * Assumes that the device is stopped
+ * Assumes that the device is stopped, must be idempotent.
  */
 static void
 nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
@@ -1119,7 +1122,7 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
                tx_ring->rd_p++;
        }
 
-       memset(tx_ring->txds, 0, sizeof(*tx_ring->txds) * tx_ring->cnt);
+       memset(tx_ring->txds, 0, tx_ring->size);
        tx_ring->wr_p = 0;
        tx_ring->rd_p = 0;
        tx_ring->qcp_rd_p = 0;
@@ -1279,13 +1282,18 @@ static void nfp_net_rx_give_one(const struct nfp_net_dp *dp,
  * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable
  * @rx_ring:   RX ring structure
  *
- * Warning: Do *not* call if ring buffers were never put on the FW freelist
- *         (i.e. device was not enabled)!
+ * Assumes that the device is stopped, must be idempotent.
  */
 static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
 {
        unsigned int wr_idx, last_idx;
 
+       /* wr_p == rd_p means ring was never fed FL bufs.  RX rings are always
+        * kept at cnt - 1 FL bufs.
+        */
+       if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0)
+               return;
+
        /* Move the empty entry to the end of the list */
        wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
        last_idx = rx_ring->cnt - 1;
@@ -1294,7 +1302,7 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
        rx_ring->rxbufs[last_idx].dma_addr = 0;
        rx_ring->rxbufs[last_idx].frag = NULL;
 
-       memset(rx_ring->rxds, 0, sizeof(*rx_ring->rxds) * rx_ring->cnt);
+       memset(rx_ring->rxds, 0, rx_ring->size);
        rx_ring->wr_p = 0;
        rx_ring->rd_p = 0;
 }
@@ -1709,8 +1717,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
                        }
                }
 
-               if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
-                                 dp->bpf_offload_xdp) && !meta.portid) {
+               if (xdp_prog && !meta.portid) {
                        void *orig_data = rxbuf->frag + pkt_off;
                        unsigned int dma_off;
                        int act;
@@ -1828,7 +1835,7 @@ static int nfp_net_poll(struct napi_struct *napi, int budget)
        unsigned int pkts_polled = 0;
 
        if (r_vec->tx_ring)
-               nfp_net_tx_complete(r_vec->tx_ring);
+               nfp_net_tx_complete(r_vec->tx_ring, budget);
        if (r_vec->rx_ring)
                pkts_polled = nfp_net_rx(r_vec->rx_ring, budget);
 
@@ -2062,7 +2069,7 @@ static void nfp_ctrl_poll(unsigned long arg)
        struct nfp_net_r_vector *r_vec = (void *)arg;
 
        spin_lock_bh(&r_vec->lock);
-       nfp_net_tx_complete(r_vec->tx_ring);
+       nfp_net_tx_complete(r_vec->tx_ring, 0);
        __nfp_ctrl_tx_queued(r_vec);
        spin_unlock_bh(&r_vec->lock);
 
@@ -2121,7 +2128,7 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
        struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
 
-       kfree(tx_ring->txbufs);
+       kvfree(tx_ring->txbufs);
 
        if (tx_ring->txds)
                dma_free_coherent(dp->dev, tx_ring->size,
@@ -2145,18 +2152,17 @@ static int
 nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
 {
        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
-       int sz;
 
        tx_ring->cnt = dp->txd_cnt;
 
-       tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt;
+       tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds));
        tx_ring->txds = dma_zalloc_coherent(dp->dev, tx_ring->size,
                                            &tx_ring->dma, GFP_KERNEL);
        if (!tx_ring->txds)
                goto err_alloc;
 
-       sz = sizeof(*tx_ring->txbufs) * tx_ring->cnt;
-       tx_ring->txbufs = kzalloc(sz, GFP_KERNEL);
+       tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs),
+                                  GFP_KERNEL);
        if (!tx_ring->txbufs)
                goto err_alloc;
 
@@ -2270,7 +2276,7 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
 
        if (dp->netdev)
                xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
-       kfree(rx_ring->rxbufs);
+       kvfree(rx_ring->rxbufs);
 
        if (rx_ring->rxds)
                dma_free_coherent(dp->dev, rx_ring->size,
@@ -2293,7 +2299,7 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
 static int
 nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
 {
-       int sz, err;
+       int err;
 
        if (dp->netdev) {
                err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev,
@@ -2303,14 +2309,14 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
        }
 
        rx_ring->cnt = dp->rxd_cnt;
-       rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
+       rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds));
        rx_ring->rxds = dma_zalloc_coherent(dp->dev, rx_ring->size,
                                            &rx_ring->dma, GFP_KERNEL);
        if (!rx_ring->rxds)
                goto err_alloc;
 
-       sz = sizeof(*rx_ring->rxbufs) * rx_ring->cnt;
-       rx_ring->rxbufs = kzalloc(sz, GFP_KERNEL);
+       rx_ring->rxbufs = kvcalloc(rx_ring->cnt, sizeof(*rx_ring->rxbufs),
+                                  GFP_KERNEL);
        if (!rx_ring->rxbufs)
                goto err_alloc;
 
@@ -2508,6 +2514,8 @@ static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
 /**
  * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP
  * @nn:      NFP Net device to reconfigure
+ *
+ * Warning: must be fully idempotent.
  */
 static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
 {
@@ -3115,6 +3123,21 @@ nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
        return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL);
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void nfp_net_netpoll(struct net_device *netdev)
+{
+       struct nfp_net *nn = netdev_priv(netdev);
+       int i;
+
+       /* nfp_net's NAPIs are statically allocated so even if there is a race
+        * with reconfig path this will simply try to schedule some disabled
+        * NAPI instances.
+        */
+       for (i = 0; i < nn->dp.num_stack_tx_rings; i++)
+               napi_schedule_irqoff(&nn->r_vecs[i].napi);
+}
+#endif
+
 static void nfp_net_stat64(struct net_device *netdev,
                           struct rtnl_link_stats64 *stats)
 {
@@ -3377,14 +3400,18 @@ static void nfp_net_del_vxlan_port(struct net_device *netdev,
                nfp_net_set_vxlan_port(nn, idx, 0);
 }
 
-static int
-nfp_net_xdp_setup_drv(struct nfp_net *nn, struct bpf_prog *prog,
-                     struct netlink_ext_ack *extack)
+static int nfp_net_xdp_setup_drv(struct nfp_net *nn, struct netdev_bpf *bpf)
 {
+       struct bpf_prog *prog = bpf->prog;
        struct nfp_net_dp *dp;
+       int err;
+
+       if (!xdp_attachment_flags_ok(&nn->xdp, bpf))
+               return -EBUSY;
 
        if (!prog == !nn->dp.xdp_prog) {
                WRITE_ONCE(nn->dp.xdp_prog, prog);
+               xdp_attachment_setup(&nn->xdp, bpf);
                return 0;
        }
 
@@ -3398,38 +3425,26 @@ nfp_net_xdp_setup_drv(struct nfp_net *nn, struct bpf_prog *prog,
        dp->rx_dma_off = prog ? XDP_PACKET_HEADROOM - nn->dp.rx_offset : 0;
 
        /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
-       return nfp_net_ring_reconfig(nn, dp, extack);
+       err = nfp_net_ring_reconfig(nn, dp, bpf->extack);
+       if (err)
+               return err;
+
+       xdp_attachment_setup(&nn->xdp, bpf);
+       return 0;
 }
 
-static int
-nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags,
-                 struct netlink_ext_ack *extack)
+static int nfp_net_xdp_setup_hw(struct nfp_net *nn, struct netdev_bpf *bpf)
 {
-       struct bpf_prog *drv_prog, *offload_prog;
        int err;
 
-       if (nn->xdp_prog && (flags ^ nn->xdp_flags) & XDP_FLAGS_MODES)
+       if (!xdp_attachment_flags_ok(&nn->xdp_hw, bpf))
                return -EBUSY;
 
-       /* Load both when no flags set to allow easy activation of driver path
-        * when program is replaced by one which can't be offloaded.
-        */
-       drv_prog     = flags & XDP_FLAGS_HW_MODE  ? NULL : prog;
-       offload_prog = flags & XDP_FLAGS_DRV_MODE ? NULL : prog;
-
-       err = nfp_net_xdp_setup_drv(nn, drv_prog, extack);
+       err = nfp_app_xdp_offload(nn->app, nn, bpf->prog, bpf->extack);
        if (err)
                return err;
 
-       err = nfp_app_xdp_offload(nn->app, nn, offload_prog, extack);
-       if (err && flags & XDP_FLAGS_HW_MODE)
-               return err;
-
-       if (nn->xdp_prog)
-               bpf_prog_put(nn->xdp_prog);
-       nn->xdp_prog = prog;
-       nn->xdp_flags = flags;
-
+       xdp_attachment_setup(&nn->xdp_hw, bpf);
        return 0;
 }
 
@@ -3439,16 +3454,13 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 
        switch (xdp->command) {
        case XDP_SETUP_PROG:
+               return nfp_net_xdp_setup_drv(nn, xdp);
        case XDP_SETUP_PROG_HW:
-               return nfp_net_xdp_setup(nn, xdp->prog, xdp->flags,
-                                        xdp->extack);
+               return nfp_net_xdp_setup_hw(nn, xdp);
        case XDP_QUERY_PROG:
-               xdp->prog_attached = !!nn->xdp_prog;
-               if (nn->dp.bpf_offload_xdp)
-                       xdp->prog_attached = XDP_ATTACHED_HW;
-               xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0;
-               xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0;
-               return 0;
+               return xdp_attachment_query(&nn->xdp, xdp);
+       case XDP_QUERY_PROG_HW:
+               return xdp_attachment_query(&nn->xdp_hw, xdp);
        default:
                return nfp_app_bpf(nn->app, nn, xdp);
        }
@@ -3476,12 +3488,17 @@ static int nfp_net_set_mac_address(struct net_device *netdev, void *addr)
 }
 
 const struct net_device_ops nfp_net_netdev_ops = {
+       .ndo_init               = nfp_app_ndo_init,
+       .ndo_uninit             = nfp_app_ndo_uninit,
        .ndo_open               = nfp_net_netdev_open,
        .ndo_stop               = nfp_net_netdev_close,
        .ndo_start_xmit         = nfp_net_tx,
        .ndo_get_stats64        = nfp_net_stat64,
        .ndo_vlan_rx_add_vid    = nfp_net_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = nfp_net_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = nfp_net_netpoll,
+#endif
        .ndo_set_vf_mac         = nfp_app_set_vf_mac,
        .ndo_set_vf_vlan        = nfp_app_set_vf_vlan,
        .ndo_set_vf_spoofchk    = nfp_app_set_vf_spoofchk,
index 26d1cc4e2906132c5772d965db885b237c9d112c..6a79c8e4a7a404a9ae48ca8bf3eae492cc3783ad 100644 (file)
@@ -233,12 +233,10 @@ nfp_net_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 static void
 nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 {
-       struct nfp_app *app;
-
-       app = nfp_app_from_netdev(netdev);
-       if (!app)
-               return;
+       struct nfp_app *app = nfp_app_from_netdev(netdev);
 
+       strlcpy(drvinfo->bus_info, pci_name(app->pdev),
+               sizeof(drvinfo->bus_info));
        nfp_get_drvinfo(app, app->pdev, "*", drvinfo);
 }
 
@@ -452,7 +450,7 @@ static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev)
 {
        struct nfp_net *nn = netdev_priv(netdev);
 
-       return NN_RVEC_GATHER_STATS + nn->dp.num_r_vecs * NN_RVEC_PER_Q_STATS;
+       return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS;
 }
 
 static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
@@ -460,7 +458,7 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
        struct nfp_net *nn = netdev_priv(netdev);
        int i;
 
-       for (i = 0; i < nn->dp.num_r_vecs; i++) {
+       for (i = 0; i < nn->max_r_vecs; i++) {
                data = nfp_pr_et(data, "rvec_%u_rx_pkts", i);
                data = nfp_pr_et(data, "rvec_%u_tx_pkts", i);
                data = nfp_pr_et(data, "rvec_%u_tx_busy", i);
@@ -486,7 +484,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
        u64 tmp[NN_RVEC_GATHER_STATS];
        unsigned int i, j;
 
-       for (i = 0; i < nn->dp.num_r_vecs; i++) {
+       for (i = 0; i < nn->max_r_vecs; i++) {
                unsigned int start;
 
                do {
@@ -521,15 +519,13 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
        return data;
 }
 
-static unsigned int
-nfp_vnic_get_hw_stats_count(unsigned int rx_rings, unsigned int tx_rings)
+static unsigned int nfp_vnic_get_hw_stats_count(unsigned int num_vecs)
 {
-       return NN_ET_GLOBAL_STATS_LEN + (rx_rings + tx_rings) * 2;
+       return NN_ET_GLOBAL_STATS_LEN + num_vecs * 4;
 }
 
 static u8 *
-nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int rx_rings,
-                             unsigned int tx_rings, bool repr)
+nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int num_vecs, bool repr)
 {
        int swap_off, i;
 
@@ -549,36 +545,29 @@ nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int rx_rings,
        for (i = NN_ET_SWITCH_STATS_LEN * 2; i < NN_ET_GLOBAL_STATS_LEN; i++)
                data = nfp_pr_et(data, nfp_net_et_stats[i].name);
 
-       for (i = 0; i < tx_rings; i++) {
-               data = nfp_pr_et(data, "txq_%u_pkts", i);
-               data = nfp_pr_et(data, "txq_%u_bytes", i);
-       }
-
-       for (i = 0; i < rx_rings; i++) {
+       for (i = 0; i < num_vecs; i++) {
                data = nfp_pr_et(data, "rxq_%u_pkts", i);
                data = nfp_pr_et(data, "rxq_%u_bytes", i);
+               data = nfp_pr_et(data, "txq_%u_pkts", i);
+               data = nfp_pr_et(data, "txq_%u_bytes", i);
        }
 
        return data;
 }
 
 static u64 *
-nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem,
-                     unsigned int rx_rings, unsigned int tx_rings)
+nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, unsigned int num_vecs)
 {
        unsigned int i;
 
        for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++)
                *data++ = readq(mem + nfp_net_et_stats[i].off);
 
-       for (i = 0; i < tx_rings; i++) {
-               *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i));
-               *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8);
-       }
-
-       for (i = 0; i < rx_rings; i++) {
+       for (i = 0; i < num_vecs; i++) {
                *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i));
                *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8);
+               *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i));
+               *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8);
        }
 
        return data;
@@ -633,8 +622,7 @@ static void nfp_net_get_strings(struct net_device *netdev,
        switch (stringset) {
        case ETH_SS_STATS:
                data = nfp_vnic_get_sw_stats_strings(netdev, data);
-               data = nfp_vnic_get_hw_stats_strings(data, nn->dp.num_rx_rings,
-                                                    nn->dp.num_tx_rings,
+               data = nfp_vnic_get_hw_stats_strings(data, nn->max_r_vecs,
                                                     false);
                data = nfp_mac_get_stats_strings(netdev, data);
                data = nfp_app_port_get_stats_strings(nn->port, data);
@@ -649,8 +637,7 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
        struct nfp_net *nn = netdev_priv(netdev);
 
        data = nfp_vnic_get_sw_stats(netdev, data);
-       data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar,
-                                    nn->dp.num_rx_rings, nn->dp.num_tx_rings);
+       data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, nn->max_r_vecs);
        data = nfp_mac_get_stats(netdev, data);
        data = nfp_app_port_get_stats(nn->port, data);
 }
@@ -662,8 +649,7 @@ static int nfp_net_get_sset_count(struct net_device *netdev, int sset)
        switch (sset) {
        case ETH_SS_STATS:
                return nfp_vnic_get_sw_stats_count(netdev) +
-                      nfp_vnic_get_hw_stats_count(nn->dp.num_rx_rings,
-                                                  nn->dp.num_tx_rings) +
+                      nfp_vnic_get_hw_stats_count(nn->max_r_vecs) +
                       nfp_mac_get_stats_count(netdev) +
                       nfp_app_port_get_stats_count(nn->port);
        default:
@@ -679,7 +665,7 @@ static void nfp_port_get_strings(struct net_device *netdev,
        switch (stringset) {
        case ETH_SS_STATS:
                if (nfp_port_is_vnic(port))
-                       data = nfp_vnic_get_hw_stats_strings(data, 0, 0, true);
+                       data = nfp_vnic_get_hw_stats_strings(data, 0, true);
                else
                        data = nfp_mac_get_stats_strings(netdev, data);
                data = nfp_app_port_get_stats_strings(port, data);
@@ -694,7 +680,7 @@ nfp_port_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
        struct nfp_port *port = nfp_port_from_netdev(netdev);
 
        if (nfp_port_is_vnic(port))
-               data = nfp_vnic_get_hw_stats(data, port->vnic, 0, 0);
+               data = nfp_vnic_get_hw_stats(data, port->vnic, 0);
        else
                data = nfp_mac_get_stats(netdev, data);
        data = nfp_app_port_get_stats(port, data);
@@ -708,7 +694,7 @@ static int nfp_port_get_sset_count(struct net_device *netdev, int sset)
        switch (sset) {
        case ETH_SS_STATS:
                if (nfp_port_is_vnic(port))
-                       count = nfp_vnic_get_hw_stats_count(0, 0);
+                       count = nfp_vnic_get_hw_stats_count(0);
                else
                        count = nfp_mac_get_stats_count(netdev);
                count += nfp_app_port_get_stats_count(port);
index d7b712f6362fae44474e37d791841805734c01d3..18a09cdcd9c6ff0247a625d71f46395f6a55aa6b 100644 (file)
@@ -262,6 +262,8 @@ err_port_disable:
 }
 
 const struct net_device_ops nfp_repr_netdev_ops = {
+       .ndo_init               = nfp_app_ndo_init,
+       .ndo_uninit             = nfp_app_ndo_uninit,
        .ndo_open               = nfp_repr_open,
        .ndo_stop               = nfp_repr_stop,
        .ndo_start_xmit         = nfp_repr_xmit,
index 749655c329b240021a34e99612412626c26e8855..c8d0b1016a6463e8df585e3359c137379f5faea2 100644 (file)
@@ -1248,7 +1248,7 @@ static void nfp6000_free(struct nfp_cpp *cpp)
        kfree(nfp);
 }
 
-static void nfp6000_read_serial(struct device *dev, u8 *serial)
+static int nfp6000_read_serial(struct device *dev, u8 *serial)
 {
        struct pci_dev *pdev = to_pci_dev(dev);
        int pos;
@@ -1256,25 +1256,29 @@ static void nfp6000_read_serial(struct device *dev, u8 *serial)
 
        pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN);
        if (!pos) {
-               memset(serial, 0, NFP_SERIAL_LEN);
-               return;
+               dev_err(dev, "can't find PCIe Serial Number Capability\n");
+               return -EINVAL;
        }
 
        pci_read_config_dword(pdev, pos + 4, &reg);
        put_unaligned_be16(reg >> 16, serial + 4);
        pci_read_config_dword(pdev, pos + 8, &reg);
        put_unaligned_be32(reg, serial);
+
+       return 0;
 }
 
-static u16 nfp6000_get_interface(struct device *dev)
+static int nfp6000_get_interface(struct device *dev)
 {
        struct pci_dev *pdev = to_pci_dev(dev);
        int pos;
        u32 reg;
 
        pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN);
-       if (!pos)
-               return NFP_CPP_INTERFACE(NFP_CPP_INTERFACE_TYPE_PCI, 0, 0xff);
+       if (!pos) {
+               dev_err(dev, "can't find PCIe Serial Number Capability\n");
+               return -EINVAL;
+       }
 
        pci_read_config_dword(pdev, pos + 4, &reg);
 
index b0da3d4368505eef63a992ce4a652eaabba9e5a6..c338d539fa96738076146cacd104e2e6030b6cf6 100644 (file)
@@ -364,8 +364,8 @@ struct nfp_cpp_operations {
        int (*init)(struct nfp_cpp *cpp);
        void (*free)(struct nfp_cpp *cpp);
 
-       void (*read_serial)(struct device *dev, u8 *serial);
-       u16 (*get_interface)(struct device *dev);
+       int (*read_serial)(struct device *dev, u8 *serial);
+       int (*get_interface)(struct device *dev);
 
        int (*area_init)(struct nfp_cpp_area *area,
                         u32 dest, unsigned long long address,
index ef30597aa31963715676d9c4c8fb4a4ebe03ec10..73de57a09800d7d0c482e64d22995a3aa6c06613 100644 (file)
@@ -1163,10 +1163,10 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops,
 {
        const u32 arm = NFP_CPP_ID(NFP_CPP_TARGET_ARM, NFP_CPP_ACTION_RW, 0);
        struct nfp_cpp *cpp;
+       int ifc, err;
        u32 mask[2];
        u32 xpbaddr;
        size_t tgt;
-       int err;
 
        cpp = kzalloc(sizeof(*cpp), GFP_KERNEL);
        if (!cpp) {
@@ -1176,9 +1176,19 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops,
 
        cpp->op = ops;
        cpp->priv = priv;
-       cpp->interface = ops->get_interface(parent);
-       if (ops->read_serial)
-               ops->read_serial(parent, cpp->serial);
+
+       ifc = ops->get_interface(parent);
+       if (ifc < 0) {
+               err = ifc;
+               goto err_free_cpp;
+       }
+       cpp->interface = ifc;
+       if (ops->read_serial) {
+               err = ops->read_serial(parent, cpp->serial);
+               if (err)
+                       goto err_free_cpp;
+       }
+
        rwlock_init(&cpp->resource_lock);
        init_waitqueue_head(&cpp->waitq);
        lockdep_set_class(&cpp->resource_lock, &nfp_cpp_resource_lock_key);
@@ -1191,7 +1201,7 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops,
        err = device_register(&cpp->dev);
        if (err < 0) {
                put_device(&cpp->dev);
-               goto err_dev;
+               goto err_free_cpp;
        }
 
        dev_set_drvdata(&cpp->dev, cpp);
@@ -1238,7 +1248,7 @@ nfp_cpp_from_operations(const struct nfp_cpp_operations *ops,
 
 err_out:
        device_unregister(&cpp->dev);
-err_dev:
+err_free_cpp:
        kfree(cpp);
 err_malloc:
        return ERR_PTR(err);
index 7cbd0174459ceabb43fd1cabe96aece2dce92d27..1d9b0d44ddb693fbc4816b055581ffc0aa7e88cf 100644 (file)
@@ -5777,7 +5777,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
                                                      (np->rx_ring_size +
                                                      np->tx_ring_size),
                                                      &np->ring_addr,
-                                                     GFP_ATOMIC);
+                                                     GFP_KERNEL);
                if (!np->rx_ring.orig)
                        goto out_unmap;
                np->tx_ring.orig = &np->rx_ring.orig[np->rx_ring_size];
@@ -5786,7 +5786,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
                                                    sizeof(struct ring_desc_ex) *
                                                    (np->rx_ring_size +
                                                    np->tx_ring_size),
-                                                   &np->ring_addr, GFP_ATOMIC);
+                                                   &np->ring_addr, GFP_KERNEL);
                if (!np->rx_ring.ex)
                        goto out_unmap;
                np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size];
index 31288d4ad248fc5f4db4738ed9c2ca75b2a34d43..862de0f3bc41c4fe64c820fdc275c25f60afb028 100644 (file)
@@ -1,4 +1,4 @@
 obj-$(CONFIG_PCH_GBE) += pch_gbe.o
 
 pch_gbe-y := pch_gbe_phy.o pch_gbe_ethtool.o pch_gbe_param.o
-pch_gbe-y += pch_gbe_api.o pch_gbe_main.o
+pch_gbe-y += pch_gbe_main.o
index 697e29dd4bd3adf616dc43fb65c8f3378231868d..44c2f291e76633942d5353a8aeadc66d0d642d82 100644 (file)
@@ -326,32 +326,6 @@ struct pch_gbe_regs {
 #define PCH_GBE_FC_FULL                        3
 #define PCH_GBE_FC_DEFAULT             PCH_GBE_FC_FULL
 
-
-struct pch_gbe_hw;
-/**
- * struct  pch_gbe_functions - HAL APi function pointer
- * @get_bus_info:      for pch_gbe_hal_get_bus_info
- * @init_hw:           for pch_gbe_hal_init_hw
- * @read_phy_reg:      for pch_gbe_hal_read_phy_reg
- * @write_phy_reg:     for pch_gbe_hal_write_phy_reg
- * @reset_phy:         for pch_gbe_hal_phy_hw_reset
- * @sw_reset_phy:      for pch_gbe_hal_phy_sw_reset
- * @power_up_phy:      for pch_gbe_hal_power_up_phy
- * @power_down_phy:    for pch_gbe_hal_power_down_phy
- * @read_mac_addr:     for pch_gbe_hal_read_mac_addr
- */
-struct pch_gbe_functions {
-       void (*get_bus_info) (struct pch_gbe_hw *);
-       s32 (*init_hw) (struct pch_gbe_hw *);
-       s32 (*read_phy_reg) (struct pch_gbe_hw *, u32, u16 *);
-       s32 (*write_phy_reg) (struct pch_gbe_hw *, u32, u16);
-       void (*reset_phy) (struct pch_gbe_hw *);
-       void (*sw_reset_phy) (struct pch_gbe_hw *);
-       void (*power_up_phy) (struct pch_gbe_hw *hw);
-       void (*power_down_phy) (struct pch_gbe_hw *hw);
-       s32 (*read_mac_addr) (struct pch_gbe_hw *);
-};
-
 /**
  * struct pch_gbe_mac_info - MAC information
  * @addr[6]:           Store the MAC address
@@ -392,17 +366,6 @@ struct pch_gbe_phy_info {
        u16 autoneg_advertised;
 };
 
-/*!
- * @ingroup Gigabit Ether driver Layer
- * @struct  pch_gbe_bus_info
- * @brief   Bus information
- */
-struct pch_gbe_bus_info {
-       u8 type;
-       u8 speed;
-       u8 width;
-};
-
 /*!
  * @ingroup Gigabit Ether driver Layer
  * @struct  pch_gbe_hw
@@ -414,10 +377,8 @@ struct pch_gbe_hw {
        struct pch_gbe_regs  __iomem *reg;
        spinlock_t miim_lock;
 
-       const struct pch_gbe_functions *func;
        struct pch_gbe_mac_info mac;
        struct pch_gbe_phy_info phy;
-       struct pch_gbe_bus_info bus;
 };
 
 /**
@@ -680,7 +641,6 @@ void pch_gbe_set_ethtool_ops(struct net_device *netdev);
 
 /* pch_gbe_mac.c */
 s32 pch_gbe_mac_force_mac_fc(struct pch_gbe_hw *hw);
-s32 pch_gbe_mac_read_mac_addr(struct pch_gbe_hw *hw);
 u16 pch_gbe_mac_ctrl_miim(struct pch_gbe_hw *hw, u32 addr, u32 dir, u32 reg,
                          u16 data);
 #endif /* _PCH_GBE_H_ */
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_api.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_api.c
deleted file mode 100644 (file)
index 5125036..0000000
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Copyright (C) 1999 - 2010 Intel Corporation.
- * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD.
- *
- * This code was derived from the Intel e1000e Linux driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "pch_gbe.h"
-#include "pch_gbe_phy.h"
-#include "pch_gbe_api.h"
-
-/* bus type values */
-#define pch_gbe_bus_type_unknown       0
-#define pch_gbe_bus_type_pci           1
-#define pch_gbe_bus_type_pcix          2
-#define pch_gbe_bus_type_pci_express   3
-#define pch_gbe_bus_type_reserved      4
-
-/* bus speed values */
-#define pch_gbe_bus_speed_unknown      0
-#define pch_gbe_bus_speed_33           1
-#define pch_gbe_bus_speed_66           2
-#define pch_gbe_bus_speed_100          3
-#define pch_gbe_bus_speed_120          4
-#define pch_gbe_bus_speed_133          5
-#define pch_gbe_bus_speed_2500         6
-#define pch_gbe_bus_speed_reserved     7
-
-/* bus width values */
-#define pch_gbe_bus_width_unknown      0
-#define pch_gbe_bus_width_pcie_x1      1
-#define pch_gbe_bus_width_pcie_x2      2
-#define pch_gbe_bus_width_pcie_x4      4
-#define pch_gbe_bus_width_32           5
-#define pch_gbe_bus_width_64           6
-#define pch_gbe_bus_width_reserved     7
-
-/**
- * pch_gbe_plat_get_bus_info - Obtain bus information for adapter
- * @hw:        Pointer to the HW structure
- */
-static void pch_gbe_plat_get_bus_info(struct pch_gbe_hw *hw)
-{
-       hw->bus.type  = pch_gbe_bus_type_pci_express;
-       hw->bus.speed = pch_gbe_bus_speed_2500;
-       hw->bus.width = pch_gbe_bus_width_pcie_x1;
-}
-
-/**
- * pch_gbe_plat_init_hw - Initialize hardware
- * @hw:        Pointer to the HW structure
- * Returns:
- *     0:              Successfully
- *     Negative value: Failed-EBUSY
- */
-static s32 pch_gbe_plat_init_hw(struct pch_gbe_hw *hw)
-{
-       s32 ret_val;
-
-       ret_val = pch_gbe_phy_get_id(hw);
-       if (ret_val) {
-               struct pch_gbe_adapter *adapter = pch_gbe_hw_to_adapter(hw);
-
-               netdev_err(adapter->netdev, "pch_gbe_phy_get_id error\n");
-               return ret_val;
-       }
-       pch_gbe_phy_init_setting(hw);
-       /* Setup Mac interface option RGMII */
-#ifdef PCH_GBE_MAC_IFOP_RGMII
-       pch_gbe_phy_set_rgmii(hw);
-#endif
-       return ret_val;
-}
-
-static const struct pch_gbe_functions pch_gbe_ops = {
-       .get_bus_info      = pch_gbe_plat_get_bus_info,
-       .init_hw           = pch_gbe_plat_init_hw,
-       .read_phy_reg      = pch_gbe_phy_read_reg_miic,
-       .write_phy_reg     = pch_gbe_phy_write_reg_miic,
-       .reset_phy         = pch_gbe_phy_hw_reset,
-       .sw_reset_phy      = pch_gbe_phy_sw_reset,
-       .power_up_phy      = pch_gbe_phy_power_up,
-       .power_down_phy    = pch_gbe_phy_power_down,
-       .read_mac_addr     = pch_gbe_mac_read_mac_addr
-};
-
-/**
- * pch_gbe_plat_init_function_pointers - Init func ptrs
- * @hw:        Pointer to the HW structure
- */
-static void pch_gbe_plat_init_function_pointers(struct pch_gbe_hw *hw)
-{
-       /* Set PHY parameter */
-       hw->phy.reset_delay_us     = PCH_GBE_PHY_RESET_DELAY_US;
-       /* Set function pointers */
-       hw->func = &pch_gbe_ops;
-}
-
-/**
- * pch_gbe_hal_setup_init_funcs - Initializes function pointers
- * @hw:        Pointer to the HW structure
- * Returns:
- *     0:      Successfully
- *     ENOSYS: Function is not registered
- */
-s32 pch_gbe_hal_setup_init_funcs(struct pch_gbe_hw *hw)
-{
-       if (!hw->reg) {
-               struct pch_gbe_adapter *adapter = pch_gbe_hw_to_adapter(hw);
-
-               netdev_err(adapter->netdev, "ERROR: Registers not mapped\n");
-               return -ENOSYS;
-       }
-       pch_gbe_plat_init_function_pointers(hw);
-       return 0;
-}
-
-/**
- * pch_gbe_hal_get_bus_info - Obtain bus information for adapter
- * @hw:        Pointer to the HW structure
- */
-void pch_gbe_hal_get_bus_info(struct pch_gbe_hw *hw)
-{
-       if (!hw->func->get_bus_info) {
-               struct pch_gbe_adapter *adapter = pch_gbe_hw_to_adapter(hw);
-
-               netdev_err(adapter->netdev, "ERROR: configuration\n");
-               return;
-       }
-       hw->func->get_bus_info(hw);
-}
-
-/**
- * pch_gbe_hal_init_hw - Initialize hardware
- * @hw:        Pointer to the HW structure
- * Returns:
- *     0:      Successfully
- *     ENOSYS: Function is not registered
- */
-s32 pch_gbe_hal_init_hw(struct pch_gbe_hw *hw)
-{
-       if (!hw->func->init_hw) {
-               struct pch_gbe_adapter *adapter = pch_gbe_hw_to_adapter(hw);
-
-               netdev_err(adapter->netdev, "ERROR: configuration\n");
-               return -ENOSYS;
-       }
-       return hw->func->init_hw(hw);
-}
-
-/**
- * pch_gbe_hal_read_phy_reg - Reads PHY register
- * @hw:            Pointer to the HW structure
- * @offset: The register to read
- * @data:   The buffer to store the 16-bit read.
- * Returns:
- *     0:      Successfully
- *     Negative value: Failed
- */
-s32 pch_gbe_hal_read_phy_reg(struct pch_gbe_hw *hw, u32 offset,
-                                       u16 *data)
-{
-       if (!hw->func->read_phy_reg)
-               return 0;
-       return hw->func->read_phy_reg(hw, offset, data);
-}
-
-/**
- * pch_gbe_hal_write_phy_reg - Writes PHY register
- * @hw:            Pointer to the HW structure
- * @offset: The register to read
- * @data:   The value to write.
- * Returns:
- *     0:      Successfully
- *     Negative value: Failed
- */
-s32 pch_gbe_hal_write_phy_reg(struct pch_gbe_hw *hw, u32 offset,
-                                       u16 data)
-{
-       if (!hw->func->write_phy_reg)
-               return 0;
-       return hw->func->write_phy_reg(hw, offset, data);
-}
-
-/**
- * pch_gbe_hal_phy_hw_reset - Hard PHY reset
- * @hw:            Pointer to the HW structure
- */
-void pch_gbe_hal_phy_hw_reset(struct pch_gbe_hw *hw)
-{
-       if (!hw->func->reset_phy) {
-               struct pch_gbe_adapter *adapter = pch_gbe_hw_to_adapter(hw);
-
-               netdev_err(adapter->netdev, "ERROR: configuration\n");
-               return;
-       }
-       hw->func->reset_phy(hw);
-}
-
-/**
- * pch_gbe_hal_phy_sw_reset - Soft PHY reset
- * @hw:            Pointer to the HW structure
- */
-void pch_gbe_hal_phy_sw_reset(struct pch_gbe_hw *hw)
-{
-       if (!hw->func->sw_reset_phy) {
-               struct pch_gbe_adapter *adapter = pch_gbe_hw_to_adapter(hw);
-
-               netdev_err(adapter->netdev, "ERROR: configuration\n");
-               return;
-       }
-       hw->func->sw_reset_phy(hw);
-}
-
-/**
- * pch_gbe_hal_read_mac_addr - Reads MAC address
- * @hw:        Pointer to the HW structure
- * Returns:
- *     0:      Successfully
- *     ENOSYS: Function is not registered
- */
-s32 pch_gbe_hal_read_mac_addr(struct pch_gbe_hw *hw)
-{
-       if (!hw->func->read_mac_addr) {
-               struct pch_gbe_adapter *adapter = pch_gbe_hw_to_adapter(hw);
-
-               netdev_err(adapter->netdev, "ERROR: configuration\n");
-               return -ENOSYS;
-       }
-       return hw->func->read_mac_addr(hw);
-}
-
-/**
- * pch_gbe_hal_power_up_phy - Power up PHY
- * @hw:        Pointer to the HW structure
- */
-void pch_gbe_hal_power_up_phy(struct pch_gbe_hw *hw)
-{
-       if (hw->func->power_up_phy)
-               hw->func->power_up_phy(hw);
-}
-
-/**
- * pch_gbe_hal_power_down_phy - Power down PHY
- * @hw:        Pointer to the HW structure
- */
-void pch_gbe_hal_power_down_phy(struct pch_gbe_hw *hw)
-{
-       if (hw->func->power_down_phy)
-               hw->func->power_down_phy(hw);
-}
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_api.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_api.h
deleted file mode 100644 (file)
index 91ce07c..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 1999 - 2010 Intel Corporation.
- * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD.
- *
- * This code was derived from the Intel e1000e Linux driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef _PCH_GBE_API_H_
-#define _PCH_GBE_API_H_
-
-#include "pch_gbe_phy.h"
-
-s32 pch_gbe_hal_setup_init_funcs(struct pch_gbe_hw *hw);
-void pch_gbe_hal_get_bus_info(struct pch_gbe_hw *hw);
-s32 pch_gbe_hal_init_hw(struct pch_gbe_hw *hw);
-s32 pch_gbe_hal_read_phy_reg(struct pch_gbe_hw *hw, u32 offset, u16 *data);
-s32 pch_gbe_hal_write_phy_reg(struct pch_gbe_hw *hw, u32 offset, u16 data);
-void pch_gbe_hal_phy_hw_reset(struct pch_gbe_hw *hw);
-void pch_gbe_hal_phy_sw_reset(struct pch_gbe_hw *hw);
-s32 pch_gbe_hal_read_mac_addr(struct pch_gbe_hw *hw);
-void pch_gbe_hal_power_up_phy(struct pch_gbe_hw *hw);
-void pch_gbe_hal_power_down_phy(struct pch_gbe_hw *hw);
-
-#endif
index 731ce1e419e45971574d71c607e165d13daf4974..adaa0024adfed596b87c47ec08d9be94f1d0edb0 100644 (file)
@@ -17,7 +17,7 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "pch_gbe.h"
-#include "pch_gbe_api.h"
+#include "pch_gbe_phy.h"
 
 /**
  * pch_gbe_stats - Stats item information
@@ -125,7 +125,7 @@ static int pch_gbe_set_link_ksettings(struct net_device *netdev,
        u32 advertising;
        int ret;
 
-       pch_gbe_hal_write_phy_reg(hw, MII_BMCR, BMCR_RESET);
+       pch_gbe_phy_write_reg_miic(hw, MII_BMCR, BMCR_RESET);
 
        memcpy(&copy_ecmd, ecmd, sizeof(*ecmd));
 
@@ -204,7 +204,7 @@ static void pch_gbe_get_regs(struct net_device *netdev,
                *regs_buff++ = ioread32(&hw->reg->INT_ST + i);
        /* PHY register */
        for (i = 0; i < PCH_GBE_PHY_REGS_LEN; i++) {
-               pch_gbe_hal_read_phy_reg(&adapter->hw, i, &tmp);
+               pch_gbe_phy_read_reg_miic(&adapter->hw, i, &tmp);
                *regs_buff++ = tmp;
        }
 }
@@ -349,25 +349,12 @@ static int pch_gbe_set_ringparam(struct net_device *netdev,
                err = pch_gbe_setup_tx_resources(adapter, adapter->tx_ring);
                if (err)
                        goto err_setup_tx;
-               /* save the new, restore the old in order to free it,
-                * then restore the new back again */
-#ifdef RINGFREE
-               adapter->rx_ring = rx_old;
-               adapter->tx_ring = tx_old;
-               pch_gbe_free_rx_resources(adapter, adapter->rx_ring);
-               pch_gbe_free_tx_resources(adapter, adapter->tx_ring);
-               kfree(tx_old);
-               kfree(rx_old);
-               adapter->rx_ring = rxdr;
-               adapter->tx_ring = txdr;
-#else
                pch_gbe_free_rx_resources(adapter, rx_old);
                pch_gbe_free_tx_resources(adapter, tx_old);
                kfree(tx_old);
                kfree(rx_old);
                adapter->rx_ring = rxdr;
                adapter->tx_ring = txdr;
-#endif
                err = pch_gbe_up(adapter);
        }
        return err;
index 34a1581eda95578b6350eeedf3c7372e4388953a..43c0c10dfeb7ad602417b3bcccfac8e3cffd9d27 100644 (file)
@@ -18,7 +18,7 @@
  */
 
 #include "pch_gbe.h"
-#include "pch_gbe_api.h"
+#include "pch_gbe_phy.h"
 #include <linux/module.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_classify.h>
@@ -34,7 +34,6 @@ const char pch_driver_version[] = DRV_VERSION;
 #define PCH_GBE_DMA_ALIGN              0
 #define PCH_GBE_DMA_PADDING            2
 #define PCH_GBE_WATCHDOG_PERIOD                (5 * HZ)        /* watchdog time */
-#define PCH_GBE_COPYBREAK_DEFAULT      256
 #define PCH_GBE_PCI_BAR                        1
 #define PCH_GBE_RESERVE_MEMORY         0x200000        /* 2MB */
 
@@ -113,8 +112,6 @@ const char pch_driver_version[] = DRV_VERSION;
 
 #define MINNOW_PHY_RESET_GPIO          13
 
-static unsigned int copybreak __read_mostly = PCH_GBE_COPYBREAK_DEFAULT;
-
 static int pch_gbe_mdio_read(struct net_device *netdev, int addr, int reg);
 static void pch_gbe_mdio_write(struct net_device *netdev, int addr, int reg,
                               int data);
@@ -290,7 +287,7 @@ static inline void pch_gbe_mac_load_mac_addr(struct pch_gbe_hw *hw)
  * Returns:
  *     0:                      Successful.
  */
-s32 pch_gbe_mac_read_mac_addr(struct pch_gbe_hw *hw)
+static s32 pch_gbe_mac_read_mac_addr(struct pch_gbe_hw *hw)
 {
        struct pch_gbe_adapter *adapter = pch_gbe_hw_to_adapter(hw);
        u32  adr1a, adr1b;
@@ -369,9 +366,7 @@ static void pch_gbe_mac_reset_hw(struct pch_gbe_hw *hw)
        /* Read the MAC address. and store to the private data */
        pch_gbe_mac_read_mac_addr(hw);
        iowrite32(PCH_GBE_ALL_RST, &hw->reg->RESET);
-#ifdef PCH_GBE_MAC_IFOP_RGMII
        iowrite32(PCH_GBE_MODE_GMII_ETHER, &hw->reg->MODE);
-#endif
        pch_gbe_wait_clr_bit(&hw->reg->RESET, PCH_GBE_ALL_RST);
        /* Setup the receive addresses */
        pch_gbe_mac_mar_set(hw, hw->mac.addr, 0);
@@ -416,44 +411,6 @@ static void pch_gbe_mac_init_rx_addrs(struct pch_gbe_hw *hw, u16 mar_count)
        pch_gbe_wait_clr_bit(&hw->reg->ADDR_MASK, PCH_GBE_BUSY);
 }
 
-
-/**
- * pch_gbe_mac_mc_addr_list_update - Update Multicast addresses
- * @hw:                    Pointer to the HW structure
- * @mc_addr_list:   Array of multicast addresses to program
- * @mc_addr_count:  Number of multicast addresses to program
- * @mar_used_count: The first MAC Address register free to program
- * @mar_total_num:  Total number of supported MAC Address Registers
- */
-static void pch_gbe_mac_mc_addr_list_update(struct pch_gbe_hw *hw,
-                                           u8 *mc_addr_list, u32 mc_addr_count,
-                                           u32 mar_used_count, u32 mar_total_num)
-{
-       u32 i, adrmask;
-
-       /* Load the first set of multicast addresses into the exact
-        * filters (RAR).  If there are not enough to fill the RAR
-        * array, clear the filters.
-        */
-       for (i = mar_used_count; i < mar_total_num; i++) {
-               if (mc_addr_count) {
-                       pch_gbe_mac_mar_set(hw, mc_addr_list, i);
-                       mc_addr_count--;
-                       mc_addr_list += ETH_ALEN;
-               } else {
-                       /* Clear MAC address mask */
-                       adrmask = ioread32(&hw->reg->ADDR_MASK);
-                       iowrite32((adrmask | (0x0001 << i)),
-                                       &hw->reg->ADDR_MASK);
-                       /* wait busy */
-                       pch_gbe_wait_clr_bit(&hw->reg->ADDR_MASK, PCH_GBE_BUSY);
-                       /* Clear MAC address */
-                       iowrite32(0, &hw->reg->mac_adr[i].high);
-                       iowrite32(0, &hw->reg->mac_adr[i].low);
-               }
-       }
-}
-
 /**
  * pch_gbe_mac_force_mac_fc - Force the MAC's flow control settings
  * @hw:                    Pointer to the HW structure
@@ -763,14 +720,23 @@ void pch_gbe_reinit_locked(struct pch_gbe_adapter *adapter)
 void pch_gbe_reset(struct pch_gbe_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
+       struct pch_gbe_hw *hw = &adapter->hw;
+       s32 ret_val;
 
-       pch_gbe_mac_reset_hw(&adapter->hw);
+       pch_gbe_mac_reset_hw(hw);
        /* reprogram multicast address register after reset */
        pch_gbe_set_multi(netdev);
        /* Setup the receive address. */
-       pch_gbe_mac_init_rx_addrs(&adapter->hw, PCH_GBE_MAR_ENTRIES);
-       if (pch_gbe_hal_init_hw(&adapter->hw))
-               netdev_err(netdev, "Hardware Error\n");
+       pch_gbe_mac_init_rx_addrs(hw, PCH_GBE_MAR_ENTRIES);
+
+       ret_val = pch_gbe_phy_get_id(hw);
+       if (ret_val) {
+               netdev_err(adapter->netdev, "pch_gbe_phy_get_id error\n");
+               return;
+       }
+       pch_gbe_phy_init_setting(hw);
+       /* Setup Mac interface option RGMII */
+       pch_gbe_phy_set_rgmii(hw);
 }
 
 /**
@@ -1036,7 +1002,6 @@ static void pch_gbe_set_rgmii_ctrl(struct pch_gbe_adapter *adapter, u16 speed,
        unsigned long rgmii = 0;
 
        /* Set the RGMII control. */
-#ifdef PCH_GBE_MAC_IFOP_RGMII
        switch (speed) {
        case SPEED_10:
                rgmii = (PCH_GBE_RGMII_RATE_2_5M |
@@ -1052,10 +1017,6 @@ static void pch_gbe_set_rgmii_ctrl(struct pch_gbe_adapter *adapter, u16 speed,
                break;
        }
        iowrite32(rgmii, &hw->reg->RGMII_CTRL);
-#else  /* GMII */
-       rgmii = 0;
-       iowrite32(rgmii, &hw->reg->RGMII_CTRL);
-#endif
 }
 static void pch_gbe_set_mode(struct pch_gbe_adapter *adapter, u16 speed,
                              u16 duplex)
@@ -2029,12 +1990,8 @@ static int pch_gbe_sw_init(struct pch_gbe_adapter *adapter)
        adapter->rx_buffer_len = PCH_GBE_FRAME_SIZE_2048;
        hw->mac.max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
        hw->mac.min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+       hw->phy.reset_delay_us = PCH_GBE_PHY_RESET_DELAY_US;
 
-       /* Initialize the hardware-specific values */
-       if (pch_gbe_hal_setup_init_funcs(hw)) {
-               netdev_err(netdev, "Hardware Initialization Failure\n");
-               return -EIO;
-       }
        if (pch_gbe_alloc_queues(adapter)) {
                netdev_err(netdev, "Unable to allocate memory for queues\n");
                return -ENOMEM;
@@ -2075,7 +2032,7 @@ static int pch_gbe_open(struct net_device *netdev)
        err = pch_gbe_setup_rx_resources(adapter, adapter->rx_ring);
        if (err)
                goto err_setup_rx;
-       pch_gbe_hal_power_up_phy(hw);
+       pch_gbe_phy_power_up(hw);
        err = pch_gbe_up(adapter);
        if (err)
                goto err_up;
@@ -2084,7 +2041,7 @@ static int pch_gbe_open(struct net_device *netdev)
 
 err_up:
        if (!adapter->wake_up_evt)
-               pch_gbe_hal_power_down_phy(hw);
+               pch_gbe_phy_power_down(hw);
        pch_gbe_free_rx_resources(adapter, adapter->rx_ring);
 err_setup_rx:
        pch_gbe_free_tx_resources(adapter, adapter->tx_ring);
@@ -2107,7 +2064,7 @@ static int pch_gbe_stop(struct net_device *netdev)
 
        pch_gbe_down(adapter);
        if (!adapter->wake_up_evt)
-               pch_gbe_hal_power_down_phy(hw);
+               pch_gbe_phy_power_down(hw);
        pch_gbe_free_tx_resources(adapter, adapter->tx_ring);
        pch_gbe_free_rx_resources(adapter, adapter->rx_ring);
        return 0;
@@ -2148,50 +2105,52 @@ static void pch_gbe_set_multi(struct net_device *netdev)
        struct pch_gbe_adapter *adapter = netdev_priv(netdev);
        struct pch_gbe_hw *hw = &adapter->hw;
        struct netdev_hw_addr *ha;
-       u8 *mta_list;
-       u32 rctl;
-       int i;
-       int mc_count;
+       u32 rctl, adrmask;
+       int mc_count, i;
 
        netdev_dbg(netdev, "netdev->flags : 0x%08x\n", netdev->flags);
 
-       /* Check for Promiscuous and All Multicast modes */
+       /* By default enable address & multicast filtering */
        rctl = ioread32(&hw->reg->RX_MODE);
+       rctl |= PCH_GBE_ADD_FIL_EN | PCH_GBE_MLT_FIL_EN;
+
+       /* Promiscuous mode disables all hardware address filtering */
+       if (netdev->flags & IFF_PROMISC)
+               rctl &= ~(PCH_GBE_ADD_FIL_EN | PCH_GBE_MLT_FIL_EN);
+
+       /* If we want to monitor more multicast addresses than the hardware can
+        * support then disable hardware multicast filtering.
+        */
        mc_count = netdev_mc_count(netdev);
-       if ((netdev->flags & IFF_PROMISC)) {
-               rctl &= ~PCH_GBE_ADD_FIL_EN;
-               rctl &= ~PCH_GBE_MLT_FIL_EN;
-       } else if ((netdev->flags & IFF_ALLMULTI)) {
-               /* all the multicasting receive permissions */
-               rctl |= PCH_GBE_ADD_FIL_EN;
+       if ((netdev->flags & IFF_ALLMULTI) || mc_count >= PCH_GBE_MAR_ENTRIES)
                rctl &= ~PCH_GBE_MLT_FIL_EN;
-       } else {
-               if (mc_count >= PCH_GBE_MAR_ENTRIES) {
-                       /* all the multicasting receive permissions */
-                       rctl |= PCH_GBE_ADD_FIL_EN;
-                       rctl &= ~PCH_GBE_MLT_FIL_EN;
-               } else {
-                       rctl |= (PCH_GBE_ADD_FIL_EN | PCH_GBE_MLT_FIL_EN);
-               }
-       }
+
        iowrite32(rctl, &hw->reg->RX_MODE);
 
-       if (mc_count >= PCH_GBE_MAR_ENTRIES)
-               return;
-       mta_list = kmalloc_array(ETH_ALEN, mc_count, GFP_ATOMIC);
-       if (!mta_list)
+       /* If we're not using multicast filtering then there's no point
+        * configuring the unused MAC address registers.
+        */
+       if (!(rctl & PCH_GBE_MLT_FIL_EN))
                return;
 
-       /* The shared function expects a packed array of only addresses. */
-       i = 0;
-       netdev_for_each_mc_addr(ha, netdev) {
-               if (i == mc_count)
-                       break;
-               memcpy(mta_list + (i++ * ETH_ALEN), &ha->addr, ETH_ALEN);
+       /* Load the first set of multicast addresses into MAC address registers
+        * for use by hardware filtering.
+        */
+       i = 1;
+       netdev_for_each_mc_addr(ha, netdev)
+               pch_gbe_mac_mar_set(hw, ha->addr, i++);
+
+       /* If there are spare MAC registers, mask & clear them */
+       for (; i < PCH_GBE_MAR_ENTRIES; i++) {
+               /* Clear MAC address mask */
+               adrmask = ioread32(&hw->reg->ADDR_MASK);
+               iowrite32(adrmask | BIT(i), &hw->reg->ADDR_MASK);
+               /* wait busy */
+               pch_gbe_wait_clr_bit(&hw->reg->ADDR_MASK, PCH_GBE_BUSY);
+               /* Clear MAC address */
+               iowrite32(0, &hw->reg->mac_adr[i].high);
+               iowrite32(0, &hw->reg->mac_adr[i].low);
        }
-       pch_gbe_mac_mc_addr_list_update(hw, mta_list, i, 1,
-                                       PCH_GBE_MAR_ENTRIES);
-       kfree(mta_list);
 
        netdev_dbg(netdev,
                 "RX_MODE reg(check bit31,30 ADD,MLT) : 0x%08x  netdev->mc_count : 0x%08x\n",
@@ -2437,7 +2396,7 @@ static pci_ers_result_t pch_gbe_io_slot_reset(struct pci_dev *pdev)
        }
        pci_set_master(pdev);
        pci_enable_wake(pdev, PCI_D0, 0);
-       pch_gbe_hal_power_up_phy(hw);
+       pch_gbe_phy_power_up(hw);
        pch_gbe_reset(adapter);
        /* Clear wake up status */
        pch_gbe_mac_set_wol_event(hw, 0);
@@ -2482,7 +2441,7 @@ static int __pch_gbe_suspend(struct pci_dev *pdev)
                pch_gbe_mac_set_wol_event(hw, wufc);
                pci_disable_device(pdev);
        } else {
-               pch_gbe_hal_power_down_phy(hw);
+               pch_gbe_phy_power_down(hw);
                pch_gbe_mac_set_wol_event(hw, wufc);
                pci_disable_device(pdev);
        }
@@ -2511,7 +2470,7 @@ static int pch_gbe_resume(struct device *device)
                return err;
        }
        pci_set_master(pdev);
-       pch_gbe_hal_power_up_phy(hw);
+       pch_gbe_phy_power_up(hw);
        pch_gbe_reset(adapter);
        /* Clear wake on lan control and status */
        pch_gbe_mac_set_wol_event(hw, 0);
@@ -2541,7 +2500,7 @@ static void pch_gbe_remove(struct pci_dev *pdev)
        cancel_work_sync(&adapter->reset_task);
        unregister_netdev(netdev);
 
-       pch_gbe_hal_phy_hw_reset(&adapter->hw);
+       pch_gbe_phy_hw_reset(&adapter->hw);
 
        free_netdev(netdev);
 }
@@ -2627,10 +2586,9 @@ static int pch_gbe_probe(struct pci_dev *pdev,
                dev_err(&pdev->dev, "PHY initialize error\n");
                goto err_free_adapter;
        }
-       pch_gbe_hal_get_bus_info(&adapter->hw);
 
        /* Read the MAC address. and store to the private data */
-       ret = pch_gbe_hal_read_mac_addr(&adapter->hw);
+       ret = pch_gbe_mac_read_mac_addr(&adapter->hw);
        if (ret) {
                dev_err(&pdev->dev, "MAC address Read Error\n");
                goto err_free_adapter;
@@ -2677,7 +2635,7 @@ static int pch_gbe_probe(struct pci_dev *pdev,
        return 0;
 
 err_free_adapter:
-       pch_gbe_hal_phy_hw_reset(&adapter->hw);
+       pch_gbe_phy_hw_reset(&adapter->hw);
 err_free_netdev:
        free_netdev(netdev);
        return ret;
@@ -2776,32 +2734,7 @@ static struct pci_driver pch_gbe_driver = {
        .shutdown = pch_gbe_shutdown,
        .err_handler = &pch_gbe_err_handler
 };
-
-
-static int __init pch_gbe_init_module(void)
-{
-       int ret;
-
-       pr_info("EG20T PCH Gigabit Ethernet Driver - version %s\n",DRV_VERSION);
-       ret = pci_register_driver(&pch_gbe_driver);
-       if (copybreak != PCH_GBE_COPYBREAK_DEFAULT) {
-               if (copybreak == 0) {
-                       pr_info("copybreak disabled\n");
-               } else {
-                       pr_info("copybreak enabled for packets <= %u bytes\n",
-                               copybreak);
-               }
-       }
-       return ret;
-}
-
-static void __exit pch_gbe_exit_module(void)
-{
-       pci_unregister_driver(&pch_gbe_driver);
-}
-
-module_init(pch_gbe_init_module);
-module_exit(pch_gbe_exit_module);
+module_pci_driver(pch_gbe_driver);
 
 MODULE_DESCRIPTION("EG20T PCH Gigabit ethernet Driver");
 MODULE_AUTHOR("LAPIS SEMICONDUCTOR, <tshimizu818@gmail.com>");
@@ -2809,8 +2742,4 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, pch_gbe_pcidev_id);
 
-module_param(copybreak, uint, 0644);
-MODULE_PARM_DESC(copybreak,
-       "Maximum size of packet that is copied to a new buffer on receive");
-
 /* pch_gbe_main.c */
index a5cad5ea9436bae73fd8fa58f2cfb01b7c713a63..6b35b573beef292148b9d392c2af739e0a1fc334 100644 (file)
@@ -184,7 +184,7 @@ s32 pch_gbe_phy_write_reg_miic(struct pch_gbe_hw *hw, u32 offset, u16 data)
  * pch_gbe_phy_sw_reset - PHY software reset
  * @hw:                    Pointer to the HW structure
  */
-void pch_gbe_phy_sw_reset(struct pch_gbe_hw *hw)
+static void pch_gbe_phy_sw_reset(struct pch_gbe_hw *hw)
 {
        u16 phy_ctrl;
 
index 95ad0151ad028aca6312f3b0c65157a3f2d0ba7d..23ac38711619207c93e4a5e378c85298b8145595 100644 (file)
 
 #define PCH_GBE_PHY_REGS_LEN           32
 #define        PCH_GBE_PHY_RESET_DELAY_US      10
-#define PCH_GBE_MAC_IFOP_RGMII
 
 s32 pch_gbe_phy_get_id(struct pch_gbe_hw *hw);
 s32 pch_gbe_phy_read_reg_miic(struct pch_gbe_hw *hw, u32 offset, u16 *data);
 s32 pch_gbe_phy_write_reg_miic(struct pch_gbe_hw *hw, u32 offset, u16 data);
-void pch_gbe_phy_sw_reset(struct pch_gbe_hw *hw);
 void pch_gbe_phy_hw_reset(struct pch_gbe_hw *hw);
 void pch_gbe_phy_power_up(struct pch_gbe_hw *hw);
 void pch_gbe_phy_power_down(struct pch_gbe_hw *hw);
index b5ea2a56106ef3c82571c354c318b8fe40086578..1df28f2edd1f9b051ede136997e1e99a6a4c83fc 100644 (file)
@@ -2,7 +2,7 @@
 # Packet engine device configuration
 #
 
-config NET_PACKET_ENGINE
+config NET_VENDOR_PACKET_ENGINES
        bool "Packet Engine devices"
        default y
        depends on PCI
@@ -14,7 +14,7 @@ config NET_PACKET_ENGINE
          the questions about packet engine devices. If you say Y, you will
          be asked for your specific card in the following questions.
 
-if NET_PACKET_ENGINE
+if NET_VENDOR_PACKET_ENGINES
 
 config HAMACHI
        tristate "Packet Engines Hamachi GNIC-II support"
@@ -40,4 +40,4 @@ config YELLOWFIN
          To compile this driver as a module, choose M here: the module
          will be called yellowfin.  This is recommended.
 
-endif # NET_PACKET_ENGINE
+endif # NET_VENDOR_PACKET_ENGINES
index 1cd39c9a03455595d23c490313043f416309d6f3..52ad8062133521a31254789fc43126e80142b0bc 100644 (file)
@@ -566,9 +566,8 @@ static int
 netxen_send_cmd_descs(struct netxen_adapter *adapter,
                struct cmd_desc_type0 *cmd_desc_arr, int nr_desc)
 {
-       u32 i, producer, consumer;
+       u32 i, producer;
        struct netxen_cmd_buffer *pbuf;
-       struct cmd_desc_type0 *cmd_desc;
        struct nx_host_tx_ring *tx_ring;
 
        i = 0;
@@ -580,7 +579,6 @@ netxen_send_cmd_descs(struct netxen_adapter *adapter,
        __netif_tx_lock_bh(tx_ring->txq);
 
        producer = tx_ring->producer;
-       consumer = tx_ring->sw_consumer;
 
        if (nr_desc >= netxen_tx_avail(tx_ring)) {
                netif_tx_stop_queue(tx_ring->txq);
@@ -595,8 +593,6 @@ netxen_send_cmd_descs(struct netxen_adapter *adapter,
        }
 
        do {
-               cmd_desc = &cmd_desc_arr[i];
-
                pbuf = &tx_ring->cmd_buf_arr[producer];
                pbuf->skb = NULL;
                pbuf->frag_count = 0;
@@ -2350,7 +2346,7 @@ static int netxen_md_entry_err_chk(struct netxen_adapter *adapter,
 static int netxen_parse_md_template(struct netxen_adapter *adapter)
 {
        int num_of_entries, buff_level, e_cnt, esize;
-       int end_cnt = 0, rv = 0, sane_start = 0, sane_end = 0;
+       int rv = 0, sane_start = 0, sane_end = 0;
        char *dbuff;
        void *template_buff = adapter->mdump.md_template;
        char *dump_buff = adapter->mdump.md_capture_buff;
@@ -2386,8 +2382,6 @@ static int netxen_parse_md_template(struct netxen_adapter *adapter)
                        break;
                case RDEND:
                        entry->hdr.driver_flags |= NX_DUMP_SKIP;
-                       if (!sane_end)
-                               end_cnt = e_cnt;
                        sane_end += 1;
                        break;
                case CNTRL:
index 8259e8309320ae9ea3e000a2a048be449ba708cb..69aa7fc392c5e4ad1cbcd9025f56bffdf3aa92c7 100644 (file)
@@ -2073,7 +2073,7 @@ netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
        struct skb_frag_struct *frag;
 
        u32 producer;
-       int frag_count, no_of_desc;
+       int frag_count;
        u32 num_txd = tx_ring->num_desc;
 
        frag_count = skb_shinfo(skb)->nr_frags + 1;
@@ -2093,8 +2093,6 @@ netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
                frag_count = 1 + skb_shinfo(skb)->nr_frags;
        }
-       /* 4 fragments per cmd des */
-       no_of_desc = (frag_count + 3) >> 2;
 
        if (unlikely(netxen_tx_avail(tx_ring) <= TX_STOP_THRESH)) {
                netif_stop_queue(netdev);
index b5b5ff725426c13b5c379cfe788ff4d73e50480c..f1977aa440e5d19733f5e55bc6f6635c94b35c60 100644 (file)
@@ -1531,7 +1531,7 @@ void qed_qm_init_pf(struct qed_hwfn *p_hwfn,
 }
 
 /* CM PF */
-void qed_cm_init_pf(struct qed_hwfn *p_hwfn)
+static void qed_cm_init_pf(struct qed_hwfn *p_hwfn)
 {
        /* XCM pure-LB queue */
        STORE_RT_REG(p_hwfn, XCM_REG_CON_PHY_Q3_RT_OFFSET,
index e0680ce9132815568914dff86606363b9a02cb88..d02e774c8d666067bca9629b1d585bb4bb223bf3 100644 (file)
@@ -221,7 +221,6 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data,
        struct qed_hw_info *p_info = &p_hwfn->hw_info;
        enum qed_pci_personality personality;
        enum dcbx_protocol_type id;
-       char *name;
        int i;
 
        for (i = 0; i < ARRAY_SIZE(qed_dcbx_app_update); i++) {
@@ -231,7 +230,6 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data,
                        continue;
 
                personality = qed_dcbx_app_update[i].personality;
-               name = qed_dcbx_app_update[i].name;
 
                qed_dcbx_set_params(p_data, p_info, enable,
                                    prio, tc, type, personality);
@@ -869,7 +867,7 @@ static int qed_dcbx_read_mib(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-void qed_dcbx_aen(struct qed_hwfn *hwfn, u32 mib_type)
+static void qed_dcbx_aen(struct qed_hwfn *hwfn, u32 mib_type)
 {
        struct qed_common_cb_ops *op = hwfn->cdev->protocol_ops.common;
        void *cookie = hwfn->cdev->ops_cookie;
index 4340c4c90bcbe8b03e5373cfc674c8840ff640d9..1aa9fc1c5890f0e274fbe92bc2caeec843f305dc 100644 (file)
@@ -7838,8 +7838,8 @@ int qed_dbg_igu_fifo_size(struct qed_dev *cdev)
        return qed_dbg_feature_size(cdev, DBG_FEATURE_IGU_FIFO);
 }
 
-int qed_dbg_nvm_image_length(struct qed_hwfn *p_hwfn,
-                            enum qed_nvm_images image_id, u32 *length)
+static int qed_dbg_nvm_image_length(struct qed_hwfn *p_hwfn,
+                                   enum qed_nvm_images image_id, u32 *length)
 {
        struct qed_nvm_image_att image_att;
        int rc;
@@ -7854,8 +7854,9 @@ int qed_dbg_nvm_image_length(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer,
-                     u32 *num_dumped_bytes, enum qed_nvm_images image_id)
+static int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer,
+                            u32 *num_dumped_bytes,
+                            enum qed_nvm_images image_id)
 {
        struct qed_hwfn *p_hwfn =
                &cdev->hwfns[cdev->dbg_params.engine_for_debug];
index e5249b4741d03f7c347c70a861288b787653741a..6a0b46f214f4fae3e365eb7b8e5aa5cf2248c9fb 100644 (file)
@@ -230,12 +230,12 @@ static u32 qed_get_pq_flags(struct qed_hwfn *p_hwfn)
 }
 
 /* Getters for resource amounts necessary for qm initialization */
-u8 qed_init_qm_get_num_tcs(struct qed_hwfn *p_hwfn)
+static u8 qed_init_qm_get_num_tcs(struct qed_hwfn *p_hwfn)
 {
        return p_hwfn->hw_info.num_hw_tc;
 }
 
-u16 qed_init_qm_get_num_vfs(struct qed_hwfn *p_hwfn)
+static u16 qed_init_qm_get_num_vfs(struct qed_hwfn *p_hwfn)
 {
        return IS_QED_SRIOV(p_hwfn->cdev) ?
               p_hwfn->cdev->p_iov_info->total_vfs : 0;
@@ -243,7 +243,7 @@ u16 qed_init_qm_get_num_vfs(struct qed_hwfn *p_hwfn)
 
 #define NUM_DEFAULT_RLS 1
 
-u16 qed_init_qm_get_num_pf_rls(struct qed_hwfn *p_hwfn)
+static u16 qed_init_qm_get_num_pf_rls(struct qed_hwfn *p_hwfn)
 {
        u16 num_pf_rls, num_vfs = qed_init_qm_get_num_vfs(p_hwfn);
 
@@ -261,7 +261,7 @@ u16 qed_init_qm_get_num_pf_rls(struct qed_hwfn *p_hwfn)
        return num_pf_rls;
 }
 
-u16 qed_init_qm_get_num_vports(struct qed_hwfn *p_hwfn)
+static u16 qed_init_qm_get_num_vports(struct qed_hwfn *p_hwfn)
 {
        u32 pq_flags = qed_get_pq_flags(p_hwfn);
 
@@ -273,7 +273,7 @@ u16 qed_init_qm_get_num_vports(struct qed_hwfn *p_hwfn)
 }
 
 /* calc amount of PQs according to the requested flags */
-u16 qed_init_qm_get_num_pqs(struct qed_hwfn *p_hwfn)
+static u16 qed_init_qm_get_num_pqs(struct qed_hwfn *p_hwfn)
 {
        u32 pq_flags = qed_get_pq_flags(p_hwfn);
 
@@ -507,16 +507,6 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf)
        return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + vf;
 }
 
-u16 qed_get_cm_pq_idx_rl(struct qed_hwfn *p_hwfn, u8 rl)
-{
-       u16 max_rl = qed_init_qm_get_num_pf_rls(p_hwfn);
-
-       if (rl > max_rl)
-               DP_ERR(p_hwfn, "rl %d must be smaller than %d\n", rl, max_rl);
-
-       return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_RLS) + rl;
-}
-
 /* Functions for creating specific types of pqs */
 static void qed_init_qm_lb_pq(struct qed_hwfn *p_hwfn)
 {
index bee10c1781fb9dd5657c8906a6192414f1124705..8faceb691657ff26fb2b95d962e19d75efa867c5 100644 (file)
@@ -12444,6 +12444,8 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_STATS_TYPE_ISCSI           3
 #define DRV_MSG_CODE_STATS_TYPE_RDMA            4
 
+#define DRV_MSG_CODE_TRANSCEIVER_READ           0x00160000
+
 #define DRV_MSG_CODE_MASK_PARITIES              0x001a0000
 
 #define DRV_MSG_CODE_BIST_TEST                 0x001e0000
@@ -12543,6 +12545,15 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_SET_LED_MODE_ON           0x1
 #define DRV_MB_PARAM_SET_LED_MODE_OFF          0x2
 
+#define DRV_MB_PARAM_TRANSCEIVER_PORT_OFFSET           0
+#define DRV_MB_PARAM_TRANSCEIVER_PORT_MASK             0x00000003
+#define DRV_MB_PARAM_TRANSCEIVER_SIZE_OFFSET           2
+#define DRV_MB_PARAM_TRANSCEIVER_SIZE_MASK             0x000000FC
+#define DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_OFFSET    8
+#define DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK      0x0000FF00
+#define DRV_MB_PARAM_TRANSCEIVER_OFFSET_OFFSET         16
+#define DRV_MB_PARAM_TRANSCEIVER_OFFSET_MASK           0xFFFF0000
+
        /* Resource Allocation params - Driver version support */
 #define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000
 #define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT        16
@@ -12596,6 +12607,9 @@ struct public_drv_mb {
 #define FW_MSG_CODE_PHY_OK                     0x00110000
 #define FW_MSG_CODE_OK                         0x00160000
 #define FW_MSG_CODE_ERROR                      0x00170000
+#define FW_MSG_CODE_TRANSCEIVER_DIAG_OK                0x00160000
+#define FW_MSG_CODE_TRANSCEIVER_DIAG_ERROR     0x00170000
+#define FW_MSG_CODE_TRANSCEIVER_NOT_PRESENT    0x00020000
 
 #define FW_MSG_CODE_OS_WOL_SUPPORTED            0x00800000
 #define FW_MSG_CODE_OS_WOL_NOT_SUPPORTED        0x00810000
@@ -12687,6 +12701,8 @@ struct mcp_public_data {
        struct public_func func[MCP_GLOB_FUNC_MAX];
 };
 
+#define MAX_I2C_TRANSACTION_SIZE       16
+
 /* OCBB definitions */
 enum tlvs {
        /* Category 1: Device Properties */
index d845badf9b907ef06bd0ebfbf7879fe5afe5347f..d6430dfebd831a7be759751a080e516d22937ead 100644 (file)
@@ -1225,19 +1225,6 @@ void qed_gft_disable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 pf_id)
               0);
 }
 
-void qed_set_gft_event_id_cm_hdr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
-{
-       u32 rfs_cm_hdr_event_id;
-
-       /* Set RFS event ID to be awakened i Tstorm By Prs */
-       rfs_cm_hdr_event_id = qed_rd(p_hwfn, p_ptt, PRS_REG_CM_HDR_GFT);
-       rfs_cm_hdr_event_id |= T_ETH_PACKET_ACTION_GFT_EVENTID <<
-                              PRS_REG_CM_HDR_GFT_EVENT_ID_SHIFT;
-       rfs_cm_hdr_event_id |= PARSER_ETH_CONN_GFT_ACTION_CM_HDR <<
-                              PRS_REG_CM_HDR_GFT_CM_HDR_SHIFT;
-       qed_wr(p_hwfn, p_ptt, PRS_REG_CM_HDR_GFT, rfs_cm_hdr_event_id);
-}
-
 void qed_gft_config(struct qed_hwfn *p_hwfn,
                    struct qed_ptt *p_ptt,
                    u16 pf_id,
index c0d4a54a5edba6a66ff44b79db3b7c7ad4d52db2..1135387bd99d704f517679c4716760e39acce52c 100644 (file)
@@ -873,8 +873,8 @@ static void qed_iscsi_release_connection(struct qed_hwfn *p_hwfn,
        spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
 }
 
-void qed_iscsi_free_connection(struct qed_hwfn *p_hwfn,
-                              struct qed_iscsi_conn *p_conn)
+static void qed_iscsi_free_connection(struct qed_hwfn *p_hwfn,
+                                     struct qed_iscsi_conn *p_conn)
 {
        qed_chain_free(p_hwfn->cdev, &p_conn->xhq);
        qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
index 90a2b53096e257bdef3ae86c3f7710a8aa89e935..17f3dfa2cc94084552a6f66cae0044c46af35fec 100644 (file)
@@ -377,7 +377,7 @@ qed_iwarp2roce_state(enum qed_iwarp_qp_state state)
        }
 }
 
-const char *iwarp_state_names[] = {
+const static char *iwarp_state_names[] = {
        "IDLE",
        "RTS",
        "TERMINATE",
@@ -942,7 +942,7 @@ qed_iwarp_return_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
        spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
 }
 
-void
+static void
 qed_iwarp_parse_private_data(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
 {
        struct mpa_v2_hdr *mpa_v2_params;
@@ -967,7 +967,7 @@ qed_iwarp_parse_private_data(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
                                       mpa_data_size;
 }
 
-void
+static void
 qed_iwarp_mpa_reply_arrived(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
 {
        struct qed_iwarp_cm_event_params params;
@@ -2500,7 +2500,7 @@ static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle,
 /* The only slowpath for iwarp ll2 is unalign flush. When this completion
  * is received, need to reset the FPDU.
  */
-void
+static void
 qed_iwarp_ll2_slowpath(void *cxt,
                       u8 connection_handle,
                       u32 opaque_data_0, u32 opaque_data_1)
@@ -2803,8 +2803,9 @@ int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        return qed_iwarp_ll2_stop(p_hwfn, p_ptt);
 }
 
-void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
-                          struct qed_iwarp_ep *ep, u8 fw_return_code)
+static void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
+                                 struct qed_iwarp_ep *ep,
+                                 u8 fw_return_code)
 {
        struct qed_iwarp_cm_event_params params;
 
@@ -2824,8 +2825,9 @@ void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
        ep->event_cb(ep->cb_context, &params);
 }
 
-void qed_iwarp_exception_received(struct qed_hwfn *p_hwfn,
-                                 struct qed_iwarp_ep *ep, int fw_ret_code)
+static void qed_iwarp_exception_received(struct qed_hwfn *p_hwfn,
+                                        struct qed_iwarp_ep *ep,
+                                        int fw_ret_code)
 {
        struct qed_iwarp_cm_event_params params;
        bool event_cb = false;
@@ -2954,7 +2956,7 @@ qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn,
        }
 }
 
-void
+static void
 qed_iwarp_connect_complete(struct qed_hwfn *p_hwfn,
                           struct qed_iwarp_ep *ep, u8 fw_return_code)
 {
index 012973d75ad039436fb0007e9452eb0565f4938c..14ac9cab265341b9a7d2d1c10fa037ea6e4dd20f 100644 (file)
@@ -158,7 +158,8 @@ static void qed_ll2_kill_buffers(struct qed_dev *cdev)
                qed_ll2_dealloc_buffer(cdev, buffer);
 }
 
-void qed_ll2b_complete_rx_packet(void *cxt, struct qed_ll2_comp_rx_data *data)
+static void qed_ll2b_complete_rx_packet(void *cxt,
+                                       struct qed_ll2_comp_rx_data *data)
 {
        struct qed_hwfn *p_hwfn = cxt;
        struct qed_ll2_buffer *buffer = data->cookie;
index 758a9a5127fa8c00566e4f90d5f75db636570e33..dbe81310c0b6282599f2605a4c37838d38ac9ee5 100644 (file)
@@ -2102,6 +2102,28 @@ out:
        return status;
 }
 
+static int qed_read_module_eeprom(struct qed_dev *cdev, char *buf,
+                                 u8 dev_addr, u32 offset, u32 len)
+{
+       struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+       struct qed_ptt *ptt;
+       int rc = 0;
+
+       if (IS_VF(cdev))
+               return 0;
+
+       ptt = qed_ptt_acquire(hwfn);
+       if (!ptt)
+               return -EAGAIN;
+
+       rc = qed_mcp_phy_sfp_read(hwfn, ptt, MFW_PORT(hwfn), dev_addr,
+                                 offset, len, buf);
+
+       qed_ptt_release(hwfn, ptt);
+
+       return rc;
+}
+
 static struct qed_selftest_ops qed_selftest_ops_pass = {
        .selftest_memory = &qed_selftest_memory,
        .selftest_interrupt = &qed_selftest_interrupt,
@@ -2144,6 +2166,7 @@ const struct qed_common_ops qed_common_ops_pass = {
        .update_mac = &qed_update_mac,
        .update_mtu = &qed_update_mtu,
        .update_wol = &qed_update_wol,
+       .read_module_eeprom = &qed_read_module_eeprom,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
index cdd645024a32aadc40f54c4e02a88988898ce219..8e4f60e4520a34fcdd5a242584663a423f616caa 100644 (file)
@@ -570,12 +570,13 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
        return 0;
 }
 
-int qed_mcp_nvm_wr_cmd(struct qed_hwfn *p_hwfn,
-                      struct qed_ptt *p_ptt,
-                      u32 cmd,
-                      u32 param,
-                      u32 *o_mcp_resp,
-                      u32 *o_mcp_param, u32 i_txn_size, u32 *i_buf)
+static int
+qed_mcp_nvm_wr_cmd(struct qed_hwfn *p_hwfn,
+                  struct qed_ptt *p_ptt,
+                  u32 cmd,
+                  u32 param,
+                  u32 *o_mcp_resp,
+                  u32 *o_mcp_param, u32 i_txn_size, u32 *i_buf)
 {
        struct qed_mcp_mb_params mb_params;
        int rc;
@@ -2473,6 +2474,55 @@ out:
        return rc;
 }
 
+int qed_mcp_phy_sfp_read(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+                        u32 port, u32 addr, u32 offset, u32 len, u8 *p_buf)
+{
+       u32 bytes_left, bytes_to_copy, buf_size, nvm_offset = 0;
+       u32 resp, param;
+       int rc;
+
+       nvm_offset |= (port << DRV_MB_PARAM_TRANSCEIVER_PORT_OFFSET) &
+                      DRV_MB_PARAM_TRANSCEIVER_PORT_MASK;
+       nvm_offset |= (addr << DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_OFFSET) &
+                      DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK;
+
+       addr = offset;
+       offset = 0;
+       bytes_left = len;
+       while (bytes_left > 0) {
+               bytes_to_copy = min_t(u32, bytes_left,
+                                     MAX_I2C_TRANSACTION_SIZE);
+               nvm_offset &= (DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK |
+                              DRV_MB_PARAM_TRANSCEIVER_PORT_MASK);
+               nvm_offset |= ((addr + offset) <<
+                              DRV_MB_PARAM_TRANSCEIVER_OFFSET_OFFSET) &
+                              DRV_MB_PARAM_TRANSCEIVER_OFFSET_MASK;
+               nvm_offset |= (bytes_to_copy <<
+                              DRV_MB_PARAM_TRANSCEIVER_SIZE_OFFSET) &
+                              DRV_MB_PARAM_TRANSCEIVER_SIZE_MASK;
+               rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+                                       DRV_MSG_CODE_TRANSCEIVER_READ,
+                                       nvm_offset, &resp, &param, &buf_size,
+                                       (u32 *)(p_buf + offset));
+               if (rc) {
+                       DP_NOTICE(p_hwfn,
+                                 "Failed to send a transceiver read command to the MFW. rc = %d.\n",
+                                 rc);
+                       return rc;
+               }
+
+               if (resp == FW_MSG_CODE_TRANSCEIVER_NOT_PRESENT)
+                       return -ENODEV;
+               else if (resp != FW_MSG_CODE_TRANSCEIVER_DIAG_OK)
+                       return -EINVAL;
+
+               offset += buf_size;
+               bytes_left -= buf_size;
+       }
+
+       return 0;
+}
+
 int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u32 drv_mb_param = 0, rsp, param;
@@ -2959,7 +3009,7 @@ static int qed_mcp_resource_cmd(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-int
+static int
 __qed_mcp_resc_lock(struct qed_hwfn *p_hwfn,
                    struct qed_ptt *p_ptt,
                    struct qed_resc_lock_params *p_params)
index 632a838f1fe3b8780d3a58795afac5bcf63156c7..047976d5c6e962e19323bd5fb6ce99abbd92aa2e 100644 (file)
@@ -839,6 +839,22 @@ int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
                       u32 *o_mcp_resp,
                       u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf);
 
+/**
+ * @brief Read from sfp
+ *
+ *  @param p_hwfn - hw function
+ *  @param p_ptt  - PTT required for register access
+ *  @param port   - transceiver port
+ *  @param addr   - I2C address
+ *  @param offset - offset in sfp
+ *  @param len    - buffer length
+ *  @param p_buf  - buffer to read into
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_phy_sfp_read(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+                        u32 port, u32 addr, u32 offset, u32 len, u8 *p_buf);
+
 /**
  * @brief indicates whether the MFW objects [under mcp_info] are accessible
  *
index 101d677114f2a5e1c7402ef2b16c2d2929ff25c3..be941cfaa2d4fdf9f50eedd6467033617bfcdba7 100644 (file)
@@ -134,7 +134,7 @@ static bool qed_bmap_is_empty(struct qed_bmap *bmap)
        return bmap->max_count == find_first_bit(bmap->bitmap, bmap->max_count);
 }
 
-u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id)
+static u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id)
 {
        /* First sb id for RoCE is after all the l2 sb */
        return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id;
@@ -706,7 +706,7 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
        return qed_rdma_start_fw(p_hwfn, params, p_ptt);
 }
 
-int qed_rdma_stop(void *rdma_cxt)
+static int qed_rdma_stop(void *rdma_cxt)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        struct rdma_close_func_ramrod_data *p_ramrod;
index b5ce1581645f993a2e034112f30fed7c81533c18..ada4c181086443c83cabf36f516768d3bf8bc3c4 100644 (file)
@@ -157,7 +157,7 @@ static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode)
        return flavor;
 }
 
-void qed_roce_free_cid_pair(struct qed_hwfn *p_hwfn, u16 cid)
+static void qed_roce_free_cid_pair(struct qed_hwfn *p_hwfn, u16 cid)
 {
        spin_lock_bh(&p_hwfn->p_rdma_info->lock);
        qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
index 26e918d7f2f9c0603ab6b0f2f132daba6d7bcc3b..9b08a9d9e15130f0518b1f7608bbaa36e6eb15b0 100644 (file)
@@ -672,8 +672,8 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn)
        return 0;
 }
 
-bool _qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn,
-                             int vfid, bool b_fail_malicious)
+static bool _qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn,
+                                    int vfid, bool b_fail_malicious)
 {
        /* Check PF supports sriov */
        if (IS_VF(p_hwfn->cdev) || !IS_QED_SRIOV(p_hwfn->cdev) ||
@@ -687,7 +687,7 @@ bool _qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn,
        return true;
 }
 
-bool qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn, int vfid)
+static bool qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn, int vfid)
 {
        return _qed_iov_pf_sanity_check(p_hwfn, vfid, true);
 }
@@ -3979,7 +3979,7 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
        }
 }
 
-void qed_iov_pf_get_pending_events(struct qed_hwfn *p_hwfn, u64 *events)
+static void qed_iov_pf_get_pending_events(struct qed_hwfn *p_hwfn, u64 *events)
 {
        int i;
 
index be6ddde1a104ff34050ee72b7dc5bc40658e6c2b..3d42696598202591794613afebea7ed42d51be6e 100644 (file)
@@ -169,7 +169,7 @@ static void qed_vf_pf_add_qid(struct qed_hwfn *p_hwfn,
        p_qid_tlv->qid = p_cid->qid_usage_idx;
 }
 
-int _qed_vf_pf_release(struct qed_hwfn *p_hwfn, bool b_final)
+static int _qed_vf_pf_release(struct qed_hwfn *p_hwfn, bool b_final)
 {
        struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
        struct pfvf_def_resp_tlv *resp;
index f4a0f8ff826108436a3ddb973c7e424c1cd71e4e..b37857f3f950895ff2060317f3920b158ba0119c 100644 (file)
@@ -1780,6 +1780,92 @@ static int qede_set_eee(struct net_device *dev, struct ethtool_eee *edata)
        return 0;
 }
 
+static int qede_get_module_info(struct net_device *dev,
+                               struct ethtool_modinfo *modinfo)
+{
+       struct qede_dev *edev = netdev_priv(dev);
+       u8 buf[4];
+       int rc;
+
+       /* Read first 4 bytes to find the sfp type */
+       rc = edev->ops->common->read_module_eeprom(edev->cdev, buf,
+                                                  QED_I2C_DEV_ADDR_A0, 0, 4);
+       if (rc) {
+               DP_ERR(edev, "Failed reading EEPROM data %d\n", rc);
+               return rc;
+       }
+
+       switch (buf[0]) {
+       case 0x3: /* SFP, SFP+, SFP-28 */
+               modinfo->type = ETH_MODULE_SFF_8472;
+               modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+               break;
+       case 0xc: /* QSFP */
+       case 0xd: /* QSFP+ */
+               modinfo->type = ETH_MODULE_SFF_8436;
+               modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+               break;
+       case 0x11: /* QSFP-28 */
+               modinfo->type = ETH_MODULE_SFF_8636;
+               modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+               break;
+       default:
+               DP_ERR(edev, "Unknown transceiver type 0x%x\n", buf[0]);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int qede_get_module_eeprom(struct net_device *dev,
+                                 struct ethtool_eeprom *ee, u8 *data)
+{
+       struct qede_dev *edev = netdev_priv(dev);
+       u32 start_addr = ee->offset, size = 0;
+       u8 *buf = data;
+       int rc = 0;
+
+       /* Read A0 section */
+       if (ee->offset < ETH_MODULE_SFF_8079_LEN) {
+               /* Limit transfer size to the A0 section boundary */
+               if (ee->offset + ee->len > ETH_MODULE_SFF_8079_LEN)
+                       size = ETH_MODULE_SFF_8079_LEN - ee->offset;
+               else
+                       size = ee->len;
+
+               rc = edev->ops->common->read_module_eeprom(edev->cdev, buf,
+                                                          QED_I2C_DEV_ADDR_A0,
+                                                          start_addr, size);
+               if (rc) {
+                       DP_ERR(edev, "Failed reading A0 section  %d\n", rc);
+                       return rc;
+               }
+
+               buf += size;
+               start_addr += size;
+       }
+
+       /* Read A2 section */
+       if (start_addr >= ETH_MODULE_SFF_8079_LEN &&
+           start_addr < ETH_MODULE_SFF_8472_LEN) {
+               size = ee->len - size;
+               /* Limit transfer size to the A2 section boundary */
+               if (start_addr + size > ETH_MODULE_SFF_8472_LEN)
+                       size = ETH_MODULE_SFF_8472_LEN - start_addr;
+               start_addr -= ETH_MODULE_SFF_8079_LEN;
+               rc = edev->ops->common->read_module_eeprom(edev->cdev, buf,
+                                                          QED_I2C_DEV_ADDR_A2,
+                                                          start_addr, size);
+               if (rc) {
+                       DP_VERBOSE(edev, QED_MSG_DEBUG,
+                                  "Failed reading A2 section %d\n", rc);
+                       return 0;
+               }
+       }
+
+       return rc;
+}
+
 static const struct ethtool_ops qede_ethtool_ops = {
        .get_link_ksettings = qede_get_link_ksettings,
        .set_link_ksettings = qede_set_link_ksettings,
@@ -1813,6 +1899,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
        .get_channels = qede_get_channels,
        .set_channels = qede_set_channels,
        .self_test = qede_self_test,
+       .get_module_info = qede_get_module_info,
+       .get_module_eeprom = qede_get_module_eeprom,
        .get_eee = qede_get_eee,
        .set_eee = qede_set_eee,
 
index b823bfe2ea4d6a6851699ef225265dbd333b0143..f9a327c821eb608eae5743250bb0dbc55bb1fc2f 100644 (file)
@@ -1116,7 +1116,6 @@ int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp)
        case XDP_SETUP_PROG:
                return qede_xdp_set(edev, xdp->prog);
        case XDP_QUERY_PROG:
-               xdp->prog_attached = !!edev->xdp_prog;
                xdp->prog_id = edev->xdp_prog ? edev->xdp_prog->aux->id : 0;
                return 0;
        default:
index 0c744b9c6e0adf96f91d6aba6c7cda34b208c7fe..77e386ebff09c110ecd77e6c289e3354fe52cd22 100644 (file)
@@ -212,7 +212,7 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs)
                        vp->max_tx_bw = MAX_BW;
                        vp->min_tx_bw = MIN_BW;
                        vp->spoofchk = false;
-                       random_ether_addr(vp->mac);
+                       eth_random_addr(vp->mac);
                        dev_info(&adapter->pdev->dev,
                                 "MAC Address %pM is configured for VF %d\n",
                                 vp->mac, i);
index b9a7548ec6a0a7ed2cc7939b4adac70188b37cd8..0afc3d335d562d24466b9192aea291b910ebcdfe 100644 (file)
@@ -210,7 +210,7 @@ void rmnet_vnd_setup(struct net_device *rmnet_dev)
        rmnet_dev->netdev_ops = &rmnet_vnd_ops;
        rmnet_dev->mtu = RMNET_DFLT_PACKET_SIZE;
        rmnet_dev->needed_headroom = RMNET_NEEDED_HEADROOM;
-       random_ether_addr(rmnet_dev->dev_addr);
+       eth_random_addr(rmnet_dev->dev_addr);
        rmnet_dev->tx_queue_len = RMNET_TX_QUEUE_LEN;
 
        /* Raw IP mode */
index 7c69f4c8134da8d13e85235d830c2b58c975b740..e1cd934c2e4f40b7d423fc1bc7870c63034ae739 100644 (file)
@@ -99,7 +99,7 @@ config R8169
        depends on PCI
        select FW_LOADER
        select CRC32
-       select MII
+       select PHYLIB
        ---help---
          Say Y here if you have a Realtek 8169 PCI Gigabit Ethernet adapter.
 
index eaedc11ed686796b6246bf517bf7691aef43929c..8ea1fa36ca438e516a79bc20d066ad3e947a3ecd 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/etherdevice.h>
 #include <linux/delay.h>
 #include <linux/ethtool.h>
-#include <linux/mii.h>
+#include <linux/phy.h>
 #include <linux/if_vlan.h>
 #include <linux/crc32.h>
 #include <linux/in.h>
@@ -25,7 +25,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
 #include <linux/firmware.h>
-#include <linux/pci-aspm.h>
 #include <linux/prefetch.h>
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
@@ -35,7 +34,6 @@
 
 #define RTL8169_VERSION "2.3LK-NAPI"
 #define MODULENAME "r8169"
-#define PFX MODULENAME ": "
 
 #define FIRMWARE_8168D_1       "rtl_nic/rtl8168d-1.fw"
 #define FIRMWARE_8168D_2       "rtl_nic/rtl8168d-2.fw"
 #define FIRMWARE_8107E_1       "rtl_nic/rtl8107e-1.fw"
 #define FIRMWARE_8107E_2       "rtl_nic/rtl8107e-2.fw"
 
-#ifdef RTL8169_DEBUG
-#define assert(expr) \
-       if (!(expr)) {                                  \
-               printk( "Assertion failed! %s,%s,%s,line=%d\n", \
-               #expr,__FILE__,__func__,__LINE__);              \
-       }
-#define dprintk(fmt, args...) \
-       do { printk(KERN_DEBUG PFX fmt, ## args); } while (0)
-#else
-#define assert(expr) do {} while (0)
-#define dprintk(fmt, args...)  do {} while (0)
-#endif /* RTL8169_DEBUG */
-
 #define R8169_MSG_DEFAULT \
        (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN)
 
@@ -95,7 +80,6 @@ static const int multicast_filter_limit = 32;
 #define R8169_RX_RING_BYTES    (NUM_RX_DESC * sizeof(struct RxDesc))
 
 #define RTL8169_TX_TIMEOUT     (6*HZ)
-#define RTL8169_PHY_TIMEOUT    (10*HZ)
 
 /* write/read MMIO register */
 #define RTL_W8(tp, reg, val8)  writeb((val8), tp->mmio_addr + (reg))
@@ -399,12 +383,6 @@ enum rtl_registers {
        FuncForceEvent  = 0xfc,
 };
 
-enum rtl8110_registers {
-       TBICSR                  = 0x64,
-       TBI_ANAR                = 0x68,
-       TBI_LPAR                = 0x6a,
-};
-
 enum rtl8168_8101_registers {
        CSIDR                   = 0x64,
        CSIAR                   = 0x68,
@@ -571,14 +549,6 @@ enum rtl_register_content {
        PMEStatus       = (1 << 0),     /* PME status can be reset by PCI RST# */
        ASPM_en         = (1 << 0),     /* ASPM enable */
 
-       /* TBICSR p.28 */
-       TBIReset        = 0x80000000,
-       TBILoopback     = 0x40000000,
-       TBINwEnable     = 0x20000000,
-       TBINwRestart    = 0x10000000,
-       TBILinkOk       = 0x02000000,
-       TBINwComplete   = 0x01000000,
-
        /* CPlusCmd p.31 */
        EnableBist      = (1 << 15),    // 8168 8101
        Mac_dbgo_oe     = (1 << 14),    // 8168 8101
@@ -732,7 +702,6 @@ enum rtl_flag {
        RTL_FLAG_TASK_ENABLED,
        RTL_FLAG_TASK_SLOW_PENDING,
        RTL_FLAG_TASK_RESET_PENDING,
-       RTL_FLAG_TASK_PHY_PENDING,
        RTL_FLAG_MAX
 };
 
@@ -760,7 +729,6 @@ struct rtl8169_private {
        dma_addr_t RxPhyAddr;
        void *Rx_databuff[NUM_RX_DESC]; /* Rx data buffers */
        struct ring_info tx_skb[NUM_TX_DESC];   /* Tx data buffers */
-       struct timer_list timer;
        u16 cp_cmd;
 
        u16 event_slow;
@@ -776,14 +744,7 @@ struct rtl8169_private {
                void (*disable)(struct rtl8169_private *);
        } jumbo_ops;
 
-       int (*set_speed)(struct net_device *, u8 aneg, u16 sp, u8 dpx, u32 adv);
-       int (*get_link_ksettings)(struct net_device *,
-                                 struct ethtool_link_ksettings *);
-       void (*phy_reset_enable)(struct rtl8169_private *tp);
        void (*hw_start)(struct rtl8169_private *tp);
-       unsigned int (*phy_reset_pending)(struct rtl8169_private *tp);
-       unsigned int (*link_ok)(struct rtl8169_private *tp);
-       int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd);
        bool (*tso_csum)(struct rtl8169_private *, struct sk_buff *, u32 *);
 
        struct {
@@ -792,7 +753,8 @@ struct rtl8169_private {
                struct work_struct work;
        } wk;
 
-       struct mii_if_info mii;
+       unsigned supports_gmii:1;
+       struct mii_bus *mii_bus;
        dma_addr_t counters_phys_addr;
        struct rtl8169_counters *counters;
        struct rtl8169_tc_offsets tc_offset;
@@ -1143,21 +1105,6 @@ static void rtl_w0w1_phy(struct rtl8169_private *tp, int reg_addr, int p, int m)
        rtl_writephy(tp, reg_addr, (val & ~m) | p);
 }
 
-static void rtl_mdio_write(struct net_device *dev, int phy_id, int location,
-                          int val)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
-
-       rtl_writephy(tp, location, val);
-}
-
-static int rtl_mdio_read(struct net_device *dev, int phy_id, int location)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
-
-       return rtl_readphy(tp, location);
-}
-
 DECLARE_RTL_COND(rtl_ephyar_cond)
 {
        return RTL_R32(tp, EPHYAR) & EPHYAR_FLAG;
@@ -1478,54 +1425,22 @@ static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
        RTL_R8(tp, ChipCmd);
 }
 
-static unsigned int rtl8169_tbi_reset_pending(struct rtl8169_private *tp)
-{
-       return RTL_R32(tp, TBICSR) & TBIReset;
-}
-
-static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
-{
-       return rtl_readphy(tp, MII_BMCR) & BMCR_RESET;
-}
-
-static unsigned int rtl8169_tbi_link_ok(struct rtl8169_private *tp)
-{
-       return RTL_R32(tp, TBICSR) & TBILinkOk;
-}
-
-static unsigned int rtl8169_xmii_link_ok(struct rtl8169_private *tp)
-{
-       return RTL_R8(tp, PHYstatus) & LinkStatus;
-}
-
-static void rtl8169_tbi_reset_enable(struct rtl8169_private *tp)
-{
-       RTL_W32(tp, TBICSR, RTL_R32(tp, TBICSR) | TBIReset);
-}
-
-static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
-{
-       unsigned int val;
-
-       val = rtl_readphy(tp, MII_BMCR) | BMCR_RESET;
-       rtl_writephy(tp, MII_BMCR, val & 0xffff);
-}
-
 static void rtl_link_chg_patch(struct rtl8169_private *tp)
 {
        struct net_device *dev = tp->dev;
+       struct phy_device *phydev = dev->phydev;
 
        if (!netif_running(dev))
                return;
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
            tp->mac_version == RTL_GIGA_MAC_VER_38) {
-               if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
+               if (phydev->speed == SPEED_1000) {
                        rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
                                      ERIAR_EXGMAC);
-               } else if (RTL_R8(tp, PHYstatus) & _100bps) {
+               } else if (phydev->speed == SPEED_100) {
                        rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1543,7 +1458,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
                             ERIAR_EXGMAC);
        } else if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
                   tp->mac_version == RTL_GIGA_MAC_VER_36) {
-               if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
+               if (phydev->speed == SPEED_1000) {
                        rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1555,7 +1470,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
                                      ERIAR_EXGMAC);
                }
        } else if (tp->mac_version == RTL_GIGA_MAC_VER_37) {
-               if (RTL_R8(tp, PHYstatus) & _10bps) {
+               if (phydev->speed == SPEED_10) {
                        rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x4d02,
                                      ERIAR_EXGMAC);
                        rtl_eri_write(tp, 0x1dc, ERIAR_MASK_0011, 0x0060,
@@ -1567,25 +1482,6 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
        }
 }
 
-static void rtl8169_check_link_status(struct net_device *dev,
-                                     struct rtl8169_private *tp)
-{
-       struct device *d = tp_to_dev(tp);
-
-       if (tp->link_ok(tp)) {
-               rtl_link_chg_patch(tp);
-               /* This is to cancel a scheduled suspend if there's one. */
-               pm_request_resume(d);
-               netif_carrier_on(dev);
-               if (net_ratelimit())
-                       netif_info(tp, ifup, dev, "link up\n");
-       } else {
-               netif_carrier_off(dev);
-               netif_info(tp, ifdown, dev, "link down\n");
-               pm_runtime_idle(d);
-       }
-}
-
 #define WAKE_ANY (WAKE_PHY | WAKE_MAGIC | WAKE_UCAST | WAKE_BCAST | WAKE_MCAST)
 
 static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
@@ -1626,21 +1522,11 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
-       struct device *d = tp_to_dev(tp);
-
-       pm_runtime_get_noresume(d);
 
        rtl_lock_work(tp);
-
        wol->supported = WAKE_ANY;
-       if (pm_runtime_active(d))
-               wol->wolopts = __rtl8169_get_wol(tp);
-       else
-               wol->wolopts = tp->saved_wolopts;
-
+       wol->wolopts = tp->saved_wolopts;
        rtl_unlock_work(tp);
-
-       pm_runtime_put_noidle(d);
 }
 
 static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
@@ -1716,18 +1602,21 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
        struct rtl8169_private *tp = netdev_priv(dev);
        struct device *d = tp_to_dev(tp);
 
+       if (wol->wolopts & ~WAKE_ANY)
+               return -EINVAL;
+
        pm_runtime_get_noresume(d);
 
        rtl_lock_work(tp);
 
+       tp->saved_wolopts = wol->wolopts;
+
        if (pm_runtime_active(d))
-               __rtl8169_set_wol(tp, wol->wolopts);
-       else
-               tp->saved_wolopts = wol->wolopts;
+               __rtl8169_set_wol(tp, tp->saved_wolopts);
 
        rtl_unlock_work(tp);
 
-       device_set_wakeup_enable(d, wol->wolopts);
+       device_set_wakeup_enable(d, tp->saved_wolopts);
 
        pm_runtime_put_noidle(d);
 
@@ -1759,124 +1648,6 @@ static int rtl8169_get_regs_len(struct net_device *dev)
        return R8169_REGS_SIZE;
 }
 
-static int rtl8169_set_speed_tbi(struct net_device *dev,
-                                u8 autoneg, u16 speed, u8 duplex, u32 ignored)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
-       int ret = 0;
-       u32 reg;
-
-       reg = RTL_R32(tp, TBICSR);
-       if ((autoneg == AUTONEG_DISABLE) && (speed == SPEED_1000) &&
-           (duplex == DUPLEX_FULL)) {
-               RTL_W32(tp, TBICSR, reg & ~(TBINwEnable | TBINwRestart));
-       } else if (autoneg == AUTONEG_ENABLE)
-               RTL_W32(tp, TBICSR, reg | TBINwEnable | TBINwRestart);
-       else {
-               netif_warn(tp, link, dev,
-                          "incorrect speed setting refused in TBI mode\n");
-               ret = -EOPNOTSUPP;
-       }
-
-       return ret;
-}
-
-static int rtl8169_set_speed_xmii(struct net_device *dev,
-                                 u8 autoneg, u16 speed, u8 duplex, u32 adv)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
-       int giga_ctrl, bmcr;
-       int rc = -EINVAL;
-
-       rtl_writephy(tp, 0x1f, 0x0000);
-
-       if (autoneg == AUTONEG_ENABLE) {
-               int auto_nego;
-
-               auto_nego = rtl_readphy(tp, MII_ADVERTISE);
-               auto_nego &= ~(ADVERTISE_10HALF | ADVERTISE_10FULL |
-                               ADVERTISE_100HALF | ADVERTISE_100FULL);
-
-               if (adv & ADVERTISED_10baseT_Half)
-                       auto_nego |= ADVERTISE_10HALF;
-               if (adv & ADVERTISED_10baseT_Full)
-                       auto_nego |= ADVERTISE_10FULL;
-               if (adv & ADVERTISED_100baseT_Half)
-                       auto_nego |= ADVERTISE_100HALF;
-               if (adv & ADVERTISED_100baseT_Full)
-                       auto_nego |= ADVERTISE_100FULL;
-
-               auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
-
-               giga_ctrl = rtl_readphy(tp, MII_CTRL1000);
-               giga_ctrl &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
-
-               /* The 8100e/8101e/8102e do Fast Ethernet only. */
-               if (tp->mii.supports_gmii) {
-                       if (adv & ADVERTISED_1000baseT_Half)
-                               giga_ctrl |= ADVERTISE_1000HALF;
-                       if (adv & ADVERTISED_1000baseT_Full)
-                               giga_ctrl |= ADVERTISE_1000FULL;
-               } else if (adv & (ADVERTISED_1000baseT_Half |
-                                 ADVERTISED_1000baseT_Full)) {
-                       netif_info(tp, link, dev,
-                                  "PHY does not support 1000Mbps\n");
-                       goto out;
-               }
-
-               bmcr = BMCR_ANENABLE | BMCR_ANRESTART;
-
-               rtl_writephy(tp, MII_ADVERTISE, auto_nego);
-               rtl_writephy(tp, MII_CTRL1000, giga_ctrl);
-       } else {
-               if (speed == SPEED_10)
-                       bmcr = 0;
-               else if (speed == SPEED_100)
-                       bmcr = BMCR_SPEED100;
-               else
-                       goto out;
-
-               if (duplex == DUPLEX_FULL)
-                       bmcr |= BMCR_FULLDPLX;
-       }
-
-       rtl_writephy(tp, MII_BMCR, bmcr);
-
-       if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
-           tp->mac_version == RTL_GIGA_MAC_VER_03) {
-               if ((speed == SPEED_100) && (autoneg != AUTONEG_ENABLE)) {
-                       rtl_writephy(tp, 0x17, 0x2138);
-                       rtl_writephy(tp, 0x0e, 0x0260);
-               } else {
-                       rtl_writephy(tp, 0x17, 0x2108);
-                       rtl_writephy(tp, 0x0e, 0x0000);
-               }
-       }
-
-       rc = 0;
-out:
-       return rc;
-}
-
-static int rtl8169_set_speed(struct net_device *dev,
-                            u8 autoneg, u16 speed, u8 duplex, u32 advertising)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
-       int ret;
-
-       ret = tp->set_speed(dev, autoneg, speed, duplex, advertising);
-       if (ret < 0)
-               goto out;
-
-       if (netif_running(dev) && (autoneg == AUTONEG_ENABLE) &&
-           (advertising & ADVERTISED_1000baseT_Full) &&
-           !pci_is_pcie(tp->pci_dev)) {
-               mod_timer(&tp->timer, jiffies + RTL8169_PHY_TIMEOUT);
-       }
-out:
-       return ret;
-}
-
 static netdev_features_t rtl8169_fix_features(struct net_device *dev,
        netdev_features_t features)
 {
@@ -1940,76 +1711,6 @@ static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb)
                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff));
 }
 
-static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
-                                         struct ethtool_link_ksettings *cmd)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
-       u32 status;
-       u32 supported, advertising;
-
-       supported =
-               SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE;
-       cmd->base.port = PORT_FIBRE;
-
-       status = RTL_R32(tp, TBICSR);
-       advertising = (status & TBINwEnable) ?  ADVERTISED_Autoneg : 0;
-       cmd->base.autoneg = !!(status & TBINwEnable);
-
-       cmd->base.speed = SPEED_1000;
-       cmd->base.duplex = DUPLEX_FULL; /* Always set */
-
-       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-                                               supported);
-       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-                                               advertising);
-
-       return 0;
-}
-
-static int rtl8169_get_link_ksettings_xmii(struct net_device *dev,
-                                          struct ethtool_link_ksettings *cmd)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
-
-       mii_ethtool_get_link_ksettings(&tp->mii, cmd);
-
-       return 0;
-}
-
-static int rtl8169_get_link_ksettings(struct net_device *dev,
-                                     struct ethtool_link_ksettings *cmd)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
-       int rc;
-
-       rtl_lock_work(tp);
-       rc = tp->get_link_ksettings(dev, cmd);
-       rtl_unlock_work(tp);
-
-       return rc;
-}
-
-static int rtl8169_set_link_ksettings(struct net_device *dev,
-                                     const struct ethtool_link_ksettings *cmd)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
-       int rc;
-       u32 advertising;
-
-       if (!ethtool_convert_link_mode_to_legacy_u32(&advertising,
-           cmd->link_modes.advertising))
-               return -EINVAL;
-
-       del_timer_sync(&tp->timer);
-
-       rtl_lock_work(tp);
-       rc = rtl8169_set_speed(dev, cmd->base.autoneg, cmd->base.speed,
-                              cmd->base.duplex, advertising);
-       rtl_unlock_work(tp);
-
-       return rc;
-}
-
 static void rtl8169_get_regs(struct net_device *dev, struct ethtool_regs *regs,
                             void *p)
 {
@@ -2185,13 +1886,6 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
        }
 }
 
-static int rtl8169_nway_reset(struct net_device *dev)
-{
-       struct rtl8169_private *tp = netdev_priv(dev);
-
-       return mii_nway_restart(&tp->mii);
-}
-
 /*
  * Interrupt coalescing
  *
@@ -2264,7 +1958,7 @@ static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
        const struct rtl_coalesce_info *ci;
        int rc;
 
-       rc = rtl8169_get_link_ksettings(dev, &ecmd);
+       rc = phy_ethtool_get_link_ksettings(dev, &ecmd);
        if (rc < 0)
                return ERR_PTR(rc);
 
@@ -2422,9 +2116,9 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
        .get_sset_count         = rtl8169_get_sset_count,
        .get_ethtool_stats      = rtl8169_get_ethtool_stats,
        .get_ts_info            = ethtool_op_get_ts_info,
-       .nway_reset             = rtl8169_nway_reset,
-       .get_link_ksettings     = rtl8169_get_link_ksettings,
-       .set_link_ksettings     = rtl8169_set_link_ksettings,
+       .nway_reset             = phy_ethtool_nway_reset,
+       .get_link_ksettings     = phy_ethtool_get_link_ksettings,
+       .set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
 static void rtl8169_get_mac_version(struct rtl8169_private *tp,
@@ -2537,15 +2231,15 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
                           "unknown MAC, using family default\n");
                tp->mac_version = default_version;
        } else if (tp->mac_version == RTL_GIGA_MAC_VER_42) {
-               tp->mac_version = tp->mii.supports_gmii ?
+               tp->mac_version = tp->supports_gmii ?
                                  RTL_GIGA_MAC_VER_42 :
                                  RTL_GIGA_MAC_VER_43;
        } else if (tp->mac_version == RTL_GIGA_MAC_VER_45) {
-               tp->mac_version = tp->mii.supports_gmii ?
+               tp->mac_version = tp->supports_gmii ?
                                  RTL_GIGA_MAC_VER_45 :
                                  RTL_GIGA_MAC_VER_47;
        } else if (tp->mac_version == RTL_GIGA_MAC_VER_46) {
-               tp->mac_version = tp->mii.supports_gmii ?
+               tp->mac_version = tp->supports_gmii ?
                                  RTL_GIGA_MAC_VER_46 :
                                  RTL_GIGA_MAC_VER_48;
        }
@@ -2553,7 +2247,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 
 static void rtl8169_print_mac_version(struct rtl8169_private *tp)
 {
-       dprintk("mac_version = 0x%02x\n", tp->mac_version);
+       netif_dbg(tp, drv, tp->dev, "mac_version = 0x%02x\n", tp->mac_version);
 }
 
 struct phy_reg {
@@ -4405,62 +4099,16 @@ static void rtl_hw_phy_config(struct net_device *dev)
        }
 }
 
-static void rtl_phy_work(struct rtl8169_private *tp)
-{
-       struct timer_list *timer = &tp->timer;
-       unsigned long timeout = RTL8169_PHY_TIMEOUT;
-
-       assert(tp->mac_version > RTL_GIGA_MAC_VER_01);
-
-       if (tp->phy_reset_pending(tp)) {
-               /*
-                * A busy loop could burn quite a few cycles on nowadays CPU.
-                * Let's delay the execution of the timer for a few ticks.
-                */
-               timeout = HZ/10;
-               goto out_mod_timer;
-       }
-
-       if (tp->link_ok(tp))
-               return;
-
-       netif_dbg(tp, link, tp->dev, "PHY reset until link up\n");
-
-       tp->phy_reset_enable(tp);
-
-out_mod_timer:
-       mod_timer(timer, jiffies + timeout);
-}
-
 static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
 {
        if (!test_and_set_bit(flag, tp->wk.flags))
                schedule_work(&tp->wk.work);
 }
 
-static void rtl8169_phy_timer(struct timer_list *t)
-{
-       struct rtl8169_private *tp = from_timer(tp, t, timer);
-
-       rtl_schedule_task(tp, RTL_FLAG_TASK_PHY_PENDING);
-}
-
-DECLARE_RTL_COND(rtl_phy_reset_cond)
-{
-       return tp->phy_reset_pending(tp);
-}
-
-static void rtl8169_phy_reset(struct net_device *dev,
-                             struct rtl8169_private *tp)
-{
-       tp->phy_reset_enable(tp);
-       rtl_msleep_loop_wait_low(tp, &rtl_phy_reset_cond, 1, 100);
-}
-
 static bool rtl_tbi_enabled(struct rtl8169_private *tp)
 {
        return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
-           (RTL_R8(tp, PHYstatus) & TBI_Enable);
+              (RTL_R8(tp, PHYstatus) & TBI_Enable);
 }
 
 static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
@@ -4468,7 +4116,8 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
        rtl_hw_phy_config(dev);
 
        if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
-               dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
+               netif_dbg(tp, drv, dev,
+                         "Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
                RTL_W8(tp, 0x82, 0x01);
        }
 
@@ -4478,23 +4127,18 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
                pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_02) {
-               dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
+               netif_dbg(tp, drv, dev,
+                         "Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
                RTL_W8(tp, 0x82, 0x01);
-               dprintk("Set PHY Reg 0x0bh = 0x00h\n");
+               netif_dbg(tp, drv, dev,
+                         "Set PHY Reg 0x0bh = 0x00h\n");
                rtl_writephy(tp, 0x0b, 0x0000); //w 0x0b 15 0 0
        }
 
-       rtl8169_phy_reset(dev, tp);
+       /* We may have called phy_speed_down before */
+       phy_speed_up(dev->phydev);
 
-       rtl8169_set_speed(dev, AUTONEG_ENABLE, SPEED_1000, DUPLEX_FULL,
-                         ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
-                         ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
-                         (tp->mii.supports_gmii ?
-                          ADVERTISED_1000baseT_Half |
-                          ADVERTISED_1000baseT_Full : 0));
-
-       if (rtl_tbi_enabled(tp))
-               netif_info(tp, link, dev, "TBI auto-negotiating\n");
+       genphy_soft_reset(dev->phydev);
 }
 
 static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
@@ -4539,34 +4183,10 @@ static int rtl_set_mac_address(struct net_device *dev, void *p)
 
 static int rtl8169_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-       struct rtl8169_private *tp = netdev_priv(dev);
-       struct mii_ioctl_data *data = if_mii(ifr);
-
-       return netif_running(dev) ? tp->do_ioctl(tp, data, cmd) : -ENODEV;
-}
-
-static int rtl_xmii_ioctl(struct rtl8169_private *tp,
-                         struct mii_ioctl_data *data, int cmd)
-{
-       switch (cmd) {
-       case SIOCGMIIPHY:
-               data->phy_id = 32; /* Internal PHY */
-               return 0;
-
-       case SIOCGMIIREG:
-               data->val_out = rtl_readphy(tp, data->reg_num & 0x1f);
-               return 0;
-
-       case SIOCSMIIREG:
-               rtl_writephy(tp, data->reg_num & 0x1f, data->val_in);
-               return 0;
-       }
-       return -EOPNOTSUPP;
-}
+       if (!netif_running(dev))
+               return -ENODEV;
 
-static int rtl_tbi_ioctl(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd)
-{
-       return -EOPNOTSUPP;
+       return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 static void rtl_init_mdio_ops(struct rtl8169_private *tp)
@@ -4594,30 +4214,6 @@ static void rtl_init_mdio_ops(struct rtl8169_private *tp)
        }
 }
 
-static void rtl_speed_down(struct rtl8169_private *tp)
-{
-       u32 adv;
-       int lpa;
-
-       rtl_writephy(tp, 0x1f, 0x0000);
-       lpa = rtl_readphy(tp, MII_LPA);
-
-       if (lpa & (LPA_10HALF | LPA_10FULL))
-               adv = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full;
-       else if (lpa & (LPA_100HALF | LPA_100FULL))
-               adv = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
-                     ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full;
-       else
-               adv = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
-                     ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
-                     (tp->mii.supports_gmii ?
-                      ADVERTISED_1000baseT_Half |
-                      ADVERTISED_1000baseT_Full : 0);
-
-       rtl8169_set_speed(tp->dev, AUTONEG_ENABLE, SPEED_1000, DUPLEX_FULL,
-                         adv);
-}
-
 static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 {
        switch (tp->mac_version) {
@@ -4639,56 +4235,15 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 
 static bool rtl_wol_pll_power_down(struct rtl8169_private *tp)
 {
-       if (!(__rtl8169_get_wol(tp) & WAKE_ANY))
+       if (!netif_running(tp->dev) || !__rtl8169_get_wol(tp))
                return false;
 
-       rtl_speed_down(tp);
+       phy_speed_down(tp->dev->phydev, false);
        rtl_wol_suspend_quirk(tp);
 
        return true;
 }
 
-static void r8168_phy_power_up(struct rtl8169_private *tp)
-{
-       rtl_writephy(tp, 0x1f, 0x0000);
-       switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_11:
-       case RTL_GIGA_MAC_VER_12:
-       case RTL_GIGA_MAC_VER_17 ... RTL_GIGA_MAC_VER_28:
-       case RTL_GIGA_MAC_VER_31:
-               rtl_writephy(tp, 0x0e, 0x0000);
-               break;
-       default:
-               break;
-       }
-       rtl_writephy(tp, MII_BMCR, BMCR_ANENABLE);
-
-       /* give MAC/PHY some time to resume */
-       msleep(20);
-}
-
-static void r8168_phy_power_down(struct rtl8169_private *tp)
-{
-       rtl_writephy(tp, 0x1f, 0x0000);
-       switch (tp->mac_version) {
-       case RTL_GIGA_MAC_VER_32:
-       case RTL_GIGA_MAC_VER_33:
-       case RTL_GIGA_MAC_VER_40:
-       case RTL_GIGA_MAC_VER_41:
-               rtl_writephy(tp, MII_BMCR, BMCR_ANENABLE | BMCR_PDOWN);
-               break;
-
-       case RTL_GIGA_MAC_VER_11:
-       case RTL_GIGA_MAC_VER_12:
-       case RTL_GIGA_MAC_VER_17 ... RTL_GIGA_MAC_VER_28:
-       case RTL_GIGA_MAC_VER_31:
-               rtl_writephy(tp, 0x0e, 0x0200);
-       default:
-               rtl_writephy(tp, MII_BMCR, BMCR_PDOWN);
-               break;
-       }
-}
-
 static void r8168_pll_power_down(struct rtl8169_private *tp)
 {
        if (r8168_check_dash(tp))
@@ -4701,8 +4256,6 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
        if (rtl_wol_pll_power_down(tp))
                return;
 
-       r8168_phy_power_down(tp);
-
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
        case RTL_GIGA_MAC_VER_37:
@@ -4754,7 +4307,9 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
                break;
        }
 
-       r8168_phy_power_up(tp);
+       phy_resume(tp->dev->phydev);
+       /* give MAC/PHY some time to resume */
+       msleep(20);
 }
 
 static void rtl_pll_power_down(struct rtl8169_private *tp)
@@ -5172,8 +4727,8 @@ static void rtl_hw_start_8169(struct rtl8169_private *tp)
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
            tp->mac_version == RTL_GIGA_MAC_VER_03) {
-               dprintk("Set MAC Reg C+CR Offset 0xe0. "
-                       "Bit-3 and bit-14 MUST be 1\n");
+               netif_dbg(tp, drv, tp->dev,
+                         "Set MAC Reg C+CR Offset 0xe0. Bit 3 and Bit 14 MUST be 1\n");
                tp->cp_cmd |= (1 << 14);
        }
 
@@ -5236,12 +4791,7 @@ static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val)
        rtl_csi_write(tp, 0x070c, csi | val << 24);
 }
 
-static void rtl_csi_access_enable_1(struct rtl8169_private *tp)
-{
-       rtl_csi_access_enable(tp, 0x17);
-}
-
-static void rtl_csi_access_enable_2(struct rtl8169_private *tp)
+static void rtl_set_def_aspm_entry_latency(struct rtl8169_private *tp)
 {
        rtl_csi_access_enable(tp, 0x27);
 }
@@ -5290,6 +4840,17 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
        RTL_W8(tp, Config3, data);
 }
 
+static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
+{
+       if (enable) {
+               RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
+               RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
+       } else {
+               RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+               RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+       }
+}
+
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
@@ -5337,7 +4898,7 @@ static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
                { 0x07, 0,      0x2000 }
        };
 
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_ephy_init(tp, e_info_8168cp, ARRAY_SIZE(e_info_8168cp));
 
@@ -5346,7 +4907,7 @@ static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
 {
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
@@ -5359,7 +4920,7 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 {
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
@@ -5383,7 +4944,7 @@ static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
                { 0x06, 0x0080, 0x0000 }
        };
 
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        RTL_W8(tp, DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
 
@@ -5399,7 +4960,7 @@ static void rtl_hw_start_8168c_2(struct rtl8169_private *tp)
                { 0x03, 0x0400, 0x0220 }
        };
 
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_ephy_init(tp, e_info_8168c_2, ARRAY_SIZE(e_info_8168c_2));
 
@@ -5413,14 +4974,14 @@ static void rtl_hw_start_8168c_3(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168c_4(struct rtl8169_private *tp)
 {
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        __rtl_hw_start_8168cp(tp);
 }
 
 static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 {
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_disable_clock_request(tp);
 
@@ -5435,7 +4996,7 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 {
-       rtl_csi_access_enable_1(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        if (tp->dev->mtu <= ETH_DATA_LEN)
                rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
@@ -5453,7 +5014,7 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
                { 0x0c, 0x0100, 0x0020 }
        };
 
-       rtl_csi_access_enable_1(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
@@ -5482,7 +5043,7 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
                { 0x0a, 0x0000, 0x0040 }
        };
 
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_ephy_init(tp, e_info_8168e_1, ARRAY_SIZE(e_info_8168e_1));
 
@@ -5507,7 +5068,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
                { 0x19, 0x0000, 0x0224 }
        };
 
-       rtl_csi_access_enable_1(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_ephy_init(tp, e_info_8168e_2, ARRAY_SIZE(e_info_8168e_2));
 
@@ -5536,11 +5097,13 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
        RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
        RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
        RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
+
+       rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
@@ -5611,7 +5174,7 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
        rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00100006, ERIAR_EXGMAC);
 
-       rtl_csi_access_enable_1(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
@@ -5646,9 +5209,9 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
        rtl_hw_start_8168g(tp);
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
-       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+       rtl_hw_aspm_clkreq_enable(tp, false);
        rtl_ephy_init(tp, e_info_8168g_1, ARRAY_SIZE(e_info_8168g_1));
+       rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
@@ -5681,9 +5244,9 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
        rtl_hw_start_8168g(tp);
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
-       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+       rtl_hw_aspm_clkreq_enable(tp, false);
        rtl_ephy_init(tp, e_info_8411_2, ARRAY_SIZE(e_info_8411_2));
+       rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
@@ -5700,8 +5263,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
-       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+       rtl_hw_aspm_clkreq_enable(tp, false);
        rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
 
        RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
@@ -5711,7 +5273,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
        rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00100006, ERIAR_EXGMAC);
 
-       rtl_csi_access_enable_1(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
@@ -5780,6 +5342,8 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
        r8168_mac_ocp_write(tp, 0xe63e, 0x0000);
        r8168_mac_ocp_write(tp, 0xc094, 0x0000);
        r8168_mac_ocp_write(tp, 0xc09e, 0x0000);
+
+       rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
@@ -5793,7 +5357,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
        rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x5f, ERIAR_EXGMAC);
        rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00100006, ERIAR_EXGMAC);
 
-       rtl_csi_access_enable_1(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
@@ -5831,11 +5395,12 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
-       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+       rtl_hw_aspm_clkreq_enable(tp, false);
        rtl_ephy_init(tp, e_info_8168ep_1, ARRAY_SIZE(e_info_8168ep_1));
 
        rtl_hw_start_8168ep(tp);
+
+       rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
@@ -5847,14 +5412,15 @@ static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
-       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+       rtl_hw_aspm_clkreq_enable(tp, false);
        rtl_ephy_init(tp, e_info_8168ep_2, ARRAY_SIZE(e_info_8168ep_2));
 
        rtl_hw_start_8168ep(tp);
 
        RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
        RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
+
+       rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
@@ -5868,8 +5434,7 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
        };
 
        /* disable aspm and clock request before access ephy */
-       RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
-       RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+       rtl_hw_aspm_clkreq_enable(tp, false);
        rtl_ephy_init(tp, e_info_8168ep_3, ARRAY_SIZE(e_info_8168ep_3));
 
        rtl_hw_start_8168ep(tp);
@@ -5889,6 +5454,8 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
        data = r8168_mac_ocp_read(tp, 0xe860);
        data |= 0x0080;
        r8168_mac_ocp_write(tp, 0xe860, data);
+
+       rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 static void rtl_hw_start_8168(struct rtl8169_private *tp)
@@ -6006,8 +5573,9 @@ static void rtl_hw_start_8168(struct rtl8169_private *tp)
                break;
 
        default:
-               printk(KERN_ERR PFX "%s: unknown chipset (mac_version = %d).\n",
-                      tp->dev->name, tp->mac_version);
+               netif_err(tp, drv, tp->dev,
+                         "unknown chipset (mac_version = %d)\n",
+                         tp->mac_version);
                break;
        }
 }
@@ -6026,7 +5594,7 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
        };
        u8 cfg1;
 
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        RTL_W8(tp, DBG_REG, FIX_NAK_1);
 
@@ -6045,7 +5613,7 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
 {
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
 
@@ -6100,7 +5668,7 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
                { 0x1e, 0, 0x4000 }
        };
 
-       rtl_csi_access_enable_2(tp);
+       rtl_set_def_aspm_entry_latency(tp);
 
        /* Force LAN exit from ASPM if Rx/Tx are not idle */
        RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
@@ -6384,7 +5952,6 @@ static void rtl_reset_work(struct rtl8169_private *tp)
        napi_enable(&tp->napi);
        rtl_hw_start(tp);
        netif_wake_queue(dev);
-       rtl8169_check_link_status(dev, tp);
 }
 
 static void rtl8169_tx_timeout(struct net_device *dev)
@@ -7001,7 +6568,7 @@ static void rtl_slow_event_work(struct rtl8169_private *tp)
                rtl8169_pcierr_interrupt(dev);
 
        if (status & LinkChg)
-               rtl8169_check_link_status(dev, tp);
+               phy_mac_interrupt(dev->phydev);
 
        rtl_irq_enable_all(tp);
 }
@@ -7015,7 +6582,6 @@ static void rtl_task(struct work_struct *work)
                /* XXX - keep rtl_slow_event_work() as first element. */
                { RTL_FLAG_TASK_SLOW_PENDING,   rtl_slow_event_work },
                { RTL_FLAG_TASK_RESET_PENDING,  rtl_reset_work },
-               { RTL_FLAG_TASK_PHY_PENDING,    rtl_phy_work }
        };
        struct rtl8169_private *tp =
                container_of(work, struct rtl8169_private, wk.work);
@@ -7084,11 +6650,51 @@ static void rtl8169_rx_missed(struct net_device *dev)
        RTL_W32(tp, RxMissed, 0);
 }
 
+static void r8169_phylink_handler(struct net_device *ndev)
+{
+       struct rtl8169_private *tp = netdev_priv(ndev);
+
+       if (netif_carrier_ok(ndev)) {
+               rtl_link_chg_patch(tp);
+               pm_request_resume(&tp->pci_dev->dev);
+       } else {
+               pm_runtime_idle(&tp->pci_dev->dev);
+       }
+
+       if (net_ratelimit())
+               phy_print_status(ndev->phydev);
+}
+
+static int r8169_phy_connect(struct rtl8169_private *tp)
+{
+       struct phy_device *phydev = mdiobus_get_phy(tp->mii_bus, 0);
+       phy_interface_t phy_mode;
+       int ret;
+
+       phy_mode = tp->supports_gmii ? PHY_INTERFACE_MODE_GMII :
+                  PHY_INTERFACE_MODE_MII;
+
+       ret = phy_connect_direct(tp->dev, phydev, r8169_phylink_handler,
+                                phy_mode);
+       if (ret)
+               return ret;
+
+       if (!tp->supports_gmii)
+               phy_set_max_speed(phydev, SPEED_100);
+
+       /* Ensure to advertise everything, incl. pause */
+       phydev->advertising = phydev->supported;
+
+       phy_attached_info(phydev);
+
+       return 0;
+}
+
 static void rtl8169_down(struct net_device *dev)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
 
-       del_timer_sync(&tp->timer);
+       phy_stop(dev->phydev);
 
        napi_disable(&tp->napi);
        netif_stop_queue(dev);
@@ -7129,6 +6735,8 @@ static int rtl8169_close(struct net_device *dev)
 
        cancel_work_sync(&tp->wk.work);
 
+       phy_disconnect(dev->phydev);
+
        pci_free_irq(pdev, 0, tp);
 
        dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
@@ -7189,6 +6797,10 @@ static int rtl_open(struct net_device *dev)
        if (retval < 0)
                goto err_release_fw_2;
 
+       retval = r8169_phy_connect(tp);
+       if (retval)
+               goto err_free_irq;
+
        rtl_lock_work(tp);
 
        set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
@@ -7204,17 +6816,17 @@ static int rtl_open(struct net_device *dev)
        if (!rtl8169_init_counter_offsets(tp))
                netif_warn(tp, hw, dev, "counter reset/update failed\n");
 
+       phy_start(dev->phydev);
        netif_start_queue(dev);
 
        rtl_unlock_work(tp);
 
-       tp->saved_wolopts = 0;
        pm_runtime_put_sync(&pdev->dev);
-
-       rtl8169_check_link_status(dev, tp);
 out:
        return retval;
 
+err_free_irq:
+       pci_free_irq(pdev, 0, tp);
 err_release_fw_2:
        rtl_release_firmware(tp);
        rtl8169_rx_clear(tp);
@@ -7293,6 +6905,7 @@ static void rtl8169_net_suspend(struct net_device *dev)
        if (!netif_running(dev))
                return;
 
+       phy_stop(dev->phydev);
        netif_device_detach(dev);
        netif_stop_queue(dev);
 
@@ -7323,6 +6936,9 @@ static void __rtl8169_resume(struct net_device *dev)
        netif_device_attach(dev);
 
        rtl_pll_power_up(tp);
+       rtl8169_init_phy(dev, tp);
+
+       phy_start(tp->dev->phydev);
 
        rtl_lock_work(tp);
        napi_enable(&tp->napi);
@@ -7336,9 +6952,6 @@ static int rtl8169_resume(struct device *device)
 {
        struct pci_dev *pdev = to_pci_dev(device);
        struct net_device *dev = pci_get_drvdata(pdev);
-       struct rtl8169_private *tp = netdev_priv(dev);
-
-       rtl8169_init_phy(dev, tp);
 
        if (netif_running(dev))
                __rtl8169_resume(dev);
@@ -7352,13 +6965,10 @@ static int rtl8169_runtime_suspend(struct device *device)
        struct net_device *dev = pci_get_drvdata(pdev);
        struct rtl8169_private *tp = netdev_priv(dev);
 
-       if (!tp->TxDescArray) {
-               rtl_pll_power_down(tp);
+       if (!tp->TxDescArray)
                return 0;
-       }
 
        rtl_lock_work(tp);
-       tp->saved_wolopts = __rtl8169_get_wol(tp);
        __rtl8169_set_wol(tp, WAKE_ANY);
        rtl_unlock_work(tp);
 
@@ -7383,11 +6993,8 @@ static int rtl8169_runtime_resume(struct device *device)
 
        rtl_lock_work(tp);
        __rtl8169_set_wol(tp, tp->saved_wolopts);
-       tp->saved_wolopts = 0;
        rtl_unlock_work(tp);
 
-       rtl8169_init_phy(dev, tp);
-
        __rtl8169_resume(dev);
 
        return 0;
@@ -7455,7 +7062,7 @@ static void rtl_shutdown(struct pci_dev *pdev)
        rtl8169_hw_reset(tp);
 
        if (system_state == SYSTEM_POWER_OFF) {
-               if (__rtl8169_get_wol(tp) & WAKE_ANY) {
+               if (tp->saved_wolopts) {
                        rtl_wol_suspend_quirk(tp);
                        rtl_wol_shutdown_quirk(tp);
                }
@@ -7476,6 +7083,7 @@ static void rtl_remove_one(struct pci_dev *pdev)
        netif_napi_del(&tp->napi);
 
        unregister_netdev(dev);
+       mdiobus_unregister(tp->mii_bus);
 
        rtl_release_firmware(tp);
 
@@ -7561,6 +7169,68 @@ DECLARE_RTL_COND(rtl_rxtx_empty_cond)
        return (RTL_R8(tp, MCU) & RXTX_EMPTY) == RXTX_EMPTY;
 }
 
+static int r8169_mdio_read_reg(struct mii_bus *mii_bus, int phyaddr, int phyreg)
+{
+       struct rtl8169_private *tp = mii_bus->priv;
+
+       if (phyaddr > 0)
+               return -ENODEV;
+
+       return rtl_readphy(tp, phyreg);
+}
+
+static int r8169_mdio_write_reg(struct mii_bus *mii_bus, int phyaddr,
+                               int phyreg, u16 val)
+{
+       struct rtl8169_private *tp = mii_bus->priv;
+
+       if (phyaddr > 0)
+               return -ENODEV;
+
+       rtl_writephy(tp, phyreg, val);
+
+       return 0;
+}
+
+static int r8169_mdio_register(struct rtl8169_private *tp)
+{
+       struct pci_dev *pdev = tp->pci_dev;
+       struct phy_device *phydev;
+       struct mii_bus *new_bus;
+       int ret;
+
+       new_bus = devm_mdiobus_alloc(&pdev->dev);
+       if (!new_bus)
+               return -ENOMEM;
+
+       new_bus->name = "r8169";
+       new_bus->priv = tp;
+       new_bus->parent = &pdev->dev;
+       new_bus->irq[0] = PHY_IGNORE_INTERRUPT;
+       snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x",
+                PCI_DEVID(pdev->bus->number, pdev->devfn));
+
+       new_bus->read = r8169_mdio_read_reg;
+       new_bus->write = r8169_mdio_write_reg;
+
+       ret = mdiobus_register(new_bus);
+       if (ret)
+               return ret;
+
+       phydev = mdiobus_get_phy(new_bus, 0);
+       if (!phydev) {
+               mdiobus_unregister(new_bus);
+               return -ENODEV;
+       }
+
+       /* PHY will be woken up in rtl_open() */
+       phy_suspend(phydev);
+
+       tp->mii_bus = new_bus;
+
+       return 0;
+}
+
 static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
        u32 data;
@@ -7618,7 +7288,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data;
        struct rtl8169_private *tp;
-       struct mii_if_info *mii;
        struct net_device *dev;
        int chipset, region, i;
        int rc;
@@ -7638,19 +7307,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        tp->dev = dev;
        tp->pci_dev = pdev;
        tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT);
-
-       mii = &tp->mii;
-       mii->dev = dev;
-       mii->mdio_read = rtl_mdio_read;
-       mii->mdio_write = rtl_mdio_write;
-       mii->phy_id_mask = 0x1f;
-       mii->reg_num_mask = 0x1f;
-       mii->supports_gmii = cfg->has_gmii;
-
-       /* disable ASPM completely as that cause random device stop working
-        * problems as well as full system hangs for some PCIe devices users */
-       pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
-                                    PCIE_LINK_STATE_CLKPM);
+       tp->supports_gmii = cfg->has_gmii;
 
        /* enable device (incl. PCI PM wakeup and hotplug setup) */
        rc = pcim_enable_device(pdev);
@@ -7689,6 +7346,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        /* Identify chip attached to board */
        rtl8169_get_mac_version(tp, cfg->default_ver);
 
+       if (rtl_tbi_enabled(tp)) {
+               dev_err(&pdev->dev, "TBI fiber mode not supported\n");
+               return -ENODEV;
+       }
+
        tp->cp_cmd = RTL_R16(tp, CPlusCmd);
 
        if ((sizeof(dma_addr_t) > 4) &&
@@ -7736,22 +7398,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        tp->saved_wolopts = __rtl8169_get_wol(tp);
 
-       if (rtl_tbi_enabled(tp)) {
-               tp->set_speed = rtl8169_set_speed_tbi;
-               tp->get_link_ksettings = rtl8169_get_link_ksettings_tbi;
-               tp->phy_reset_enable = rtl8169_tbi_reset_enable;
-               tp->phy_reset_pending = rtl8169_tbi_reset_pending;
-               tp->link_ok = rtl8169_tbi_link_ok;
-               tp->do_ioctl = rtl_tbi_ioctl;
-       } else {
-               tp->set_speed = rtl8169_set_speed_xmii;
-               tp->get_link_ksettings = rtl8169_get_link_ksettings_xmii;
-               tp->phy_reset_enable = rtl8169_xmii_reset_enable;
-               tp->phy_reset_pending = rtl8169_xmii_reset_pending;
-               tp->link_ok = rtl8169_xmii_link_ok;
-               tp->do_ioctl = rtl_xmii_ioctl;
-       }
-
        mutex_init(&tp->wk.mutex);
        u64_stats_init(&tp->rx_stats.syncp);
        u64_stats_init(&tp->tx_stats.syncp);
@@ -7823,8 +7469,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        tp->event_slow = cfg->event_slow;
        tp->coalesce_info = cfg->coalesce_info;
 
-       timer_setup(&tp->timer, rtl8169_phy_timer, 0);
-
        tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 
        tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters),
@@ -7835,10 +7479,17 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        pci_set_drvdata(pdev, dev);
 
-       rc = register_netdev(dev);
-       if (rc < 0)
+       rc = r8169_mdio_register(tp);
+       if (rc)
                return rc;
 
+       /* chip gets powered up in rtl_open() */
+       rtl_pll_power_down(tp);
+
+       rc = register_netdev(dev);
+       if (rc)
+               goto err_mdio_unregister;
+
        netif_info(tp, probe, dev, "%s, %pM, XID %08x, IRQ %d\n",
                   rtl_chip_infos[chipset].name, dev->dev_addr,
                   (u32)(RTL_R32(tp, TxConfig) & 0xfcf0f8ff),
@@ -7853,12 +7504,14 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (r8168_check_dash(tp))
                rtl8168_driver_start(tp);
 
-       netif_carrier_off(dev);
-
        if (pci_dev_run_wake(pdev))
                pm_runtime_put_sync(&pdev->dev);
 
        return 0;
+
+err_mdio_unregister:
+       mdiobus_unregister(tp->mii_bus);
+       return rc;
 }
 
 static struct pci_driver rtl8169_pci_driver = {
index 0d811c02ff340f09a385ec0677f0388034615eef..c06f2df895c2c3e432fc8341f15cc77b0550db11 100644 (file)
@@ -1167,7 +1167,7 @@ static int ravb_get_sset_count(struct net_device *netdev, int sset)
 }
 
 static void ravb_get_ethtool_stats(struct net_device *ndev,
-                                  struct ethtool_stats *stats, u64 *data)
+                                  struct ethtool_stats *estats, u64 *data)
 {
        struct ravb_private *priv = netdev_priv(ndev);
        int i = 0;
@@ -1199,7 +1199,7 @@ static void ravb_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
 {
        switch (stringset) {
        case ETH_SS_STATS:
-               memcpy(data, *ravb_gstrings_stats, sizeof(ravb_gstrings_stats));
+               memcpy(data, ravb_gstrings_stats, sizeof(ravb_gstrings_stats));
                break;
        }
 }
@@ -1564,7 +1564,7 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                /* TAG and timestamp required flag */
                skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
                desc->tagh_tsr = (ts_skb->tag >> 4) | TX_TSR;
-               desc->ds_tagl |= le16_to_cpu(ts_skb->tag << 12);
+               desc->ds_tagl |= cpu_to_le16(ts_skb->tag << 12);
        }
 
        skb_tx_timestamp(skb);
@@ -1597,7 +1597,8 @@ drop:
 }
 
 static u16 ravb_select_queue(struct net_device *ndev, struct sk_buff *skb,
-                            void *accel_priv, select_queue_fallback_t fallback)
+                            struct net_device *sb_dev,
+                            select_queue_fallback_t fallback)
 {
        /* If skb needs TX timestamp, it is handled in network control queue */
        return (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) ? RAVB_NC :
index 5614fd231bbe1e4685582e15faf27dad412b241b..5573199c4536c283164351ca17d4377ad5a6c6a2 100644 (file)
@@ -439,10 +439,15 @@ static void sh_eth_modify(struct net_device *ndev, int enum_index, u32 clear,
                     enum_index);
 }
 
+static u16 sh_eth_tsu_get_offset(struct sh_eth_private *mdp, int enum_index)
+{
+       return mdp->reg_offset[enum_index];
+}
+
 static void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data,
                             int enum_index)
 {
-       u16 offset = mdp->reg_offset[enum_index];
+       u16 offset = sh_eth_tsu_get_offset(mdp, enum_index);
 
        if (WARN_ON(offset == SH_ETH_OFFSET_INVALID))
                return;
@@ -452,7 +457,7 @@ static void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data,
 
 static u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index)
 {
-       u16 offset = mdp->reg_offset[enum_index];
+       u16 offset = sh_eth_tsu_get_offset(mdp, enum_index);
 
        if (WARN_ON(offset == SH_ETH_OFFSET_INVALID))
                return ~0U;
@@ -622,7 +627,6 @@ static struct sh_eth_cpu_data r7s72100_data = {
        .tpauser        = 1,
        .hw_swap        = 1,
        .rpadir         = 1,
-       .rpadir_value   = 2 << 16,
        .no_trimd       = 1,
        .no_ade         = 1,
        .xdfar_rw       = 1,
@@ -672,7 +676,6 @@ static struct sh_eth_cpu_data r8a7740_data = {
        .bculr          = 1,
        .hw_swap        = 1,
        .rpadir         = 1,
-       .rpadir_value   = 2 << 16,
        .no_trimd       = 1,
        .no_ade         = 1,
        .xdfar_rw       = 1,
@@ -798,7 +801,6 @@ static struct sh_eth_cpu_data r8a77980_data = {
        .hw_swap        = 1,
        .nbst           = 1,
        .rpadir         = 1,
-       .rpadir_value   = 2 << 16,
        .no_trimd       = 1,
        .no_ade         = 1,
        .xdfar_rw       = 1,
@@ -851,7 +853,6 @@ static struct sh_eth_cpu_data sh7724_data = {
        .tpauser        = 1,
        .hw_swap        = 1,
        .rpadir         = 1,
-       .rpadir_value   = 0x00020000, /* NET_IP_ALIGN assumed to be 2 */
 };
 
 static void sh_eth_set_rate_sh7757(struct net_device *ndev)
@@ -898,7 +899,6 @@ static struct sh_eth_cpu_data sh7757_data = {
        .hw_swap        = 1,
        .no_ade         = 1,
        .rpadir         = 1,
-       .rpadir_value   = 2 << 16,
        .rtrate         = 1,
        .dual_port      = 1,
 };
@@ -978,7 +978,6 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
        .bculr          = 1,
        .hw_swap        = 1,
        .rpadir         = 1,
-       .rpadir_value   = 2 << 16,
        .no_trimd       = 1,
        .no_ade         = 1,
        .xdfar_rw       = 1,
@@ -1467,7 +1466,7 @@ static int sh_eth_dev_init(struct net_device *ndev)
        /* Descriptor format */
        sh_eth_ring_format(ndev);
        if (mdp->cd->rpadir)
-               sh_eth_write(ndev, mdp->cd->rpadir_value, RPADIR);
+               sh_eth_write(ndev, NET_IP_ALIGN << 16, RPADIR);
 
        /* all sh_eth int mask */
        sh_eth_write(ndev, 0, EESIPR);
@@ -1527,9 +1526,9 @@ static int sh_eth_dev_init(struct net_device *ndev)
 
        /* mask reset */
        if (mdp->cd->apr)
-               sh_eth_write(ndev, APR_AP, APR);
+               sh_eth_write(ndev, 1, APR);
        if (mdp->cd->mpr)
-               sh_eth_write(ndev, MPR_MP, MPR);
+               sh_eth_write(ndev, 1, MPR);
        if (mdp->cd->tpauser)
                sh_eth_write(ndev, TPAUSER_UNLIMITED, TPAUSER);
 
@@ -2677,34 +2676,36 @@ static int sh_eth_tsu_busy(struct net_device *ndev)
        return 0;
 }
 
-static int sh_eth_tsu_write_entry(struct net_device *ndev, void *reg,
+static int sh_eth_tsu_write_entry(struct net_device *ndev, u16 offset,
                                  const u8 *addr)
 {
+       struct sh_eth_private *mdp = netdev_priv(ndev);
        u32 val;
 
        val = addr[0] << 24 | addr[1] << 16 | addr[2] << 8 | addr[3];
-       iowrite32(val, reg);
+       iowrite32(val, mdp->tsu_addr + offset);
        if (sh_eth_tsu_busy(ndev) < 0)
                return -EBUSY;
 
        val = addr[4] << 8 | addr[5];
-       iowrite32(val, reg + 4);
+       iowrite32(val, mdp->tsu_addr + offset + 4);
        if (sh_eth_tsu_busy(ndev) < 0)
                return -EBUSY;
 
        return 0;
 }
 
-static void sh_eth_tsu_read_entry(void *reg, u8 *addr)
+static void sh_eth_tsu_read_entry(struct net_device *ndev, u16 offset, u8 *addr)
 {
+       struct sh_eth_private *mdp = netdev_priv(ndev);
        u32 val;
 
-       val = ioread32(reg);
+       val = ioread32(mdp->tsu_addr + offset);
        addr[0] = (val >> 24) & 0xff;
        addr[1] = (val >> 16) & 0xff;
        addr[2] = (val >> 8) & 0xff;
        addr[3] = val & 0xff;
-       val = ioread32(reg + 4);
+       val = ioread32(mdp->tsu_addr + offset + 4);
        addr[4] = (val >> 8) & 0xff;
        addr[5] = val & 0xff;
 }
@@ -2713,12 +2714,12 @@ static void sh_eth_tsu_read_entry(void *reg, u8 *addr)
 static int sh_eth_tsu_find_entry(struct net_device *ndev, const u8 *addr)
 {
        struct sh_eth_private *mdp = netdev_priv(ndev);
-       void *reg_offset = sh_eth_tsu_get_offset(mdp, TSU_ADRH0);
+       u16 reg_offset = sh_eth_tsu_get_offset(mdp, TSU_ADRH0);
        int i;
        u8 c_addr[ETH_ALEN];
 
        for (i = 0; i < SH_ETH_TSU_CAM_ENTRIES; i++, reg_offset += 8) {
-               sh_eth_tsu_read_entry(reg_offset, c_addr);
+               sh_eth_tsu_read_entry(ndev, reg_offset, c_addr);
                if (ether_addr_equal(addr, c_addr))
                        return i;
        }
@@ -2740,7 +2741,7 @@ static int sh_eth_tsu_disable_cam_entry_table(struct net_device *ndev,
                                              int entry)
 {
        struct sh_eth_private *mdp = netdev_priv(ndev);
-       void *reg_offset = sh_eth_tsu_get_offset(mdp, TSU_ADRH0);
+       u16 reg_offset = sh_eth_tsu_get_offset(mdp, TSU_ADRH0);
        int ret;
        u8 blank[ETH_ALEN];
 
@@ -2757,7 +2758,7 @@ static int sh_eth_tsu_disable_cam_entry_table(struct net_device *ndev,
 static int sh_eth_tsu_add_entry(struct net_device *ndev, const u8 *addr)
 {
        struct sh_eth_private *mdp = netdev_priv(ndev);
-       void *reg_offset = sh_eth_tsu_get_offset(mdp, TSU_ADRH0);
+       u16 reg_offset = sh_eth_tsu_get_offset(mdp, TSU_ADRH0);
        int i, ret;
 
        if (!mdp->cd->tsu)
@@ -2831,15 +2832,15 @@ static int sh_eth_tsu_purge_all(struct net_device *ndev)
 static void sh_eth_tsu_purge_mcast(struct net_device *ndev)
 {
        struct sh_eth_private *mdp = netdev_priv(ndev);
+       u16 reg_offset = sh_eth_tsu_get_offset(mdp, TSU_ADRH0);
        u8 addr[ETH_ALEN];
-       void *reg_offset = sh_eth_tsu_get_offset(mdp, TSU_ADRH0);
        int i;
 
        if (!mdp->cd->tsu)
                return;
 
        for (i = 0; i < SH_ETH_TSU_CAM_ENTRIES; i++, reg_offset += 8) {
-               sh_eth_tsu_read_entry(reg_offset, addr);
+               sh_eth_tsu_read_entry(ndev, reg_offset, addr);
                if (is_multicast_ether_addr(addr))
                        sh_eth_tsu_del_entry(ndev, addr);
        }
index 726c55a82dd7b76e1d90a836cddcd66bcbf96a24..f94be99cf4002190347014d7643387883556981a 100644 (file)
@@ -383,12 +383,12 @@ enum ECSIPR_STATUS_MASK_BIT {
 
 /* APR */
 enum APR_BIT {
-       APR_AP = 0x00000001,
+       APR_AP = 0x0000ffff,
 };
 
 /* MPR */
 enum MPR_BIT {
-       MPR_MP = 0x00000001,
+       MPR_MP = 0x0000ffff,
 };
 
 /* TRSCER */
@@ -403,8 +403,7 @@ enum DESC_I_BIT {
 
 /* RPADIR */
 enum RPADIR_BIT {
-       RPADIR_PADS1 = 0x20000, RPADIR_PADS0 = 0x10000,
-       RPADIR_PADR = 0x0003f,
+       RPADIR_PADS = 0x1f0000, RPADIR_PADR = 0xffff,
 };
 
 /* FDR */
@@ -488,7 +487,6 @@ struct sh_eth_cpu_data {
        u32 ecsipr_value;
        u32 fdr_value;
        u32 fcftr_value;
-       u32 rpadir_value;
 
        /* interrupt checking mask */
        u32 tx_check;
@@ -560,10 +558,4 @@ struct sh_eth_private {
        unsigned wol_enabled:1;
 };
 
-static inline void *sh_eth_tsu_get_offset(struct sh_eth_private *mdp,
-                                         int enum_index)
-{
-       return mdp->tsu_addr + mdp->reg_offset[enum_index];
-}
-
 #endif /* #ifndef __SH_ETH_H__ */
index 3bac58d0f88b20f8982295a2efa09c9096d98822..c5c297e78d068f41968fefc80e049b9ba1b70c62 100644 (file)
@@ -6,3 +6,5 @@ sfc-$(CONFIG_SFC_MTD)   += mtd.o
 sfc-$(CONFIG_SFC_SRIOV)        += sriov.o siena_sriov.o ef10_sriov.o
 
 obj-$(CONFIG_SFC)      += sfc.o
+
+obj-$(CONFIG_SFC_FALCON) += falcon/
index 019cef1d3cf72ce2d34b3a36283cabf9227c9879..3d76fd1504c2bc38d80b051ad885f8e0b022aba9 100644 (file)
@@ -199,7 +199,7 @@ static int efx_ef10_sriov_alloc_vf_vswitching(struct efx_nic *efx)
                return -ENOMEM;
 
        for (i = 0; i < efx->vf_count; i++) {
-               random_ether_addr(nic_data->vf[i].mac);
+               eth_random_addr(nic_data->vf[i].mac);
                nic_data->vf[i].efx = NULL;
                nic_data->vf[i].vlan = EFX_EF10_NO_VLAN;
 
@@ -564,7 +564,7 @@ int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan,
 {
        struct efx_ef10_nic_data *nic_data = efx->nic_data;
        struct ef10_vf *vf;
-       u16 old_vlan, new_vlan;
+       u16 new_vlan;
        int rc = 0, rc2 = 0;
 
        if (vf_i >= efx->vf_count)
@@ -619,7 +619,6 @@ int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan,
        }
 
        /* Do the actual vlan change */
-       old_vlan = vf->vlan;
        vf->vlan = new_vlan;
 
        /* Restore everything in reverse order */
index ce3a177081a854a683493f7f6f2c79ac63f60cc4..330233286e785254f5f29c87f9557a305974f606 100644 (file)
@@ -264,11 +264,17 @@ static int efx_check_disabled(struct efx_nic *efx)
 static int efx_process_channel(struct efx_channel *channel, int budget)
 {
        struct efx_tx_queue *tx_queue;
+       struct list_head rx_list;
        int spent;
 
        if (unlikely(!channel->enabled))
                return 0;
 
+       /* Prepare the batch receive list */
+       EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
+       INIT_LIST_HEAD(&rx_list);
+       channel->rx_list = &rx_list;
+
        efx_for_each_channel_tx_queue(tx_queue, channel) {
                tx_queue->pkts_compl = 0;
                tx_queue->bytes_compl = 0;
@@ -291,6 +297,10 @@ static int efx_process_channel(struct efx_channel *channel, int budget)
                }
        }
 
+       /* Receive any packets we queued up */
+       netif_receive_skb_list(channel->rx_list);
+       channel->rx_list = NULL;
+
        return spent;
 }
 
@@ -555,6 +565,8 @@ static int efx_probe_channel(struct efx_channel *channel)
                        goto fail;
        }
 
+       channel->rx_list = NULL;
+
        return 0;
 
 fail:
index 65568925c3efe6398d1e26ca1520b53919f60291..961b9297964069440a962e39cd409760b71c073a 100644 (file)
@@ -448,6 +448,7 @@ enum efx_sync_events_state {
  *     __efx_rx_packet(), or zero if there is none
  * @rx_pkt_index: Ring index of first buffer for next packet to be delivered
  *     by __efx_rx_packet(), if @rx_pkt_n_frags != 0
+ * @rx_list: list of SKBs from current RX, awaiting processing
  * @rx_queue: RX queue for this channel
  * @tx_queue: TX queues for this channel
  * @sync_events_state: Current state of sync events on this channel
@@ -500,6 +501,8 @@ struct efx_channel {
        unsigned int rx_pkt_n_frags;
        unsigned int rx_pkt_index;
 
+       struct list_head *rx_list;
+
        struct efx_rx_queue rx_queue;
        struct efx_tx_queue tx_queue[EFX_TXQ_TYPES];
 
index d2e254f2f72bf0f2fffd0078397da819a676980f..396ff01298cdfd4d8ccc5566e36dba778a7f4e59 100644 (file)
@@ -634,7 +634,12 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
                        return;
 
        /* Pass the packet up */
-       netif_receive_skb(skb);
+       if (channel->rx_list != NULL)
+               /* Add to list, will pass up later */
+               list_add_tail(&skb->list, channel->rx_list);
+       else
+               /* No list, so pass it up now */
+               netif_receive_skb(skb);
 }
 
 /* Handle a received packet.  Second half: Touches packet payload. */
index 949aaef390b67bbf9a21c15a24f912898b422229..15c62c160953308b3f4018e8bd7973dee9fa59cd 100644 (file)
@@ -321,7 +321,6 @@ static int epic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        static int card_idx = -1;
        void __iomem *ioaddr;
        int chip_idx = (int) ent->driver_data;
-       int irq;
        struct net_device *dev;
        struct epic_private *ep;
        int i, ret, option = 0, duplex = 0;
@@ -338,7 +337,6 @@ static int epic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        ret = pci_enable_device(pdev);
        if (ret)
                goto out;
-       irq = pdev->irq;
 
        if (pci_resource_len(pdev, 0) < EPIC_TOTAL_SIZE) {
                dev_err(&pdev->dev, "no PCI region space\n");
index e080d3e7c582ff7df2a2fe0ecf6074ac4a306470..01589b6982e4d433d4936a4bb6fa2c2510d278c0 100644 (file)
@@ -780,11 +780,9 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 static int netsec_napi_poll(struct napi_struct *napi, int budget)
 {
        struct netsec_priv *priv;
-       struct net_device *ndev;
        int tx, rx, done, todo;
 
        priv = container_of(napi, struct netsec_priv, napi);
-       ndev = priv->ndev;
 
        todo = budget;
        do {
index f08625a02cea03f8dcf55ca3f9cc0f06fa4e3ca5..7b923362ee5509d6fdedc9c15d09cd760182fab2 100644 (file)
@@ -61,6 +61,7 @@ struct rk_priv_data {
        struct clk *mac_clk_tx;
        struct clk *clk_mac_ref;
        struct clk *clk_mac_refout;
+       struct clk *clk_mac_speed;
        struct clk *aclk_mac;
        struct clk *pclk_mac;
        struct clk *clk_phy;
@@ -83,6 +84,64 @@ struct rk_priv_data {
        (((tx) ? soc##_GMAC_TXCLK_DLY_ENABLE : soc##_GMAC_TXCLK_DLY_DISABLE) | \
         ((rx) ? soc##_GMAC_RXCLK_DLY_ENABLE : soc##_GMAC_RXCLK_DLY_DISABLE))
 
+#define PX30_GRF_GMAC_CON1             0x0904
+
+/* PX30_GRF_GMAC_CON1 */
+#define PX30_GMAC_PHY_INTF_SEL_RMII    (GRF_CLR_BIT(4) | GRF_CLR_BIT(5) | \
+                                        GRF_BIT(6))
+#define PX30_GMAC_SPEED_10M            GRF_CLR_BIT(2)
+#define PX30_GMAC_SPEED_100M           GRF_BIT(2)
+
+static void px30_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+       struct device *dev = &bsp_priv->pdev->dev;
+
+       if (IS_ERR(bsp_priv->grf)) {
+               dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+               return;
+       }
+
+       regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
+                    PX30_GMAC_PHY_INTF_SEL_RMII);
+}
+
+static void px30_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+       struct device *dev = &bsp_priv->pdev->dev;
+       int ret;
+
+       if (IS_ERR(bsp_priv->clk_mac_speed)) {
+               dev_err(dev, "%s: Missing clk_mac_speed clock\n", __func__);
+               return;
+       }
+
+       if (speed == 10) {
+               regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
+                            PX30_GMAC_SPEED_10M);
+
+               ret = clk_set_rate(bsp_priv->clk_mac_speed, 2500000);
+               if (ret)
+                       dev_err(dev, "%s: set clk_mac_speed rate 2500000 failed: %d\n",
+                               __func__, ret);
+       } else if (speed == 100) {
+               regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
+                            PX30_GMAC_SPEED_100M);
+
+               ret = clk_set_rate(bsp_priv->clk_mac_speed, 25000000);
+               if (ret)
+                       dev_err(dev, "%s: set clk_mac_speed rate 25000000 failed: %d\n",
+                               __func__, ret);
+
+       } else {
+               dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+       }
+}
+
+static const struct rk_gmac_ops px30_ops = {
+       .set_to_rmii = px30_set_to_rmii,
+       .set_rmii_speed = px30_set_rmii_speed,
+};
+
 #define RK3128_GRF_MAC_CON0    0x0168
 #define RK3128_GRF_MAC_CON1    0x016c
 
@@ -1042,6 +1101,10 @@ static int rk_gmac_clk_init(struct plat_stmmacenet_data *plat)
                }
        }
 
+       bsp_priv->clk_mac_speed = devm_clk_get(dev, "clk_mac_speed");
+       if (IS_ERR(bsp_priv->clk_mac_speed))
+               dev_err(dev, "cannot get clock %s\n", "clk_mac_speed");
+
        if (bsp_priv->clock_input) {
                dev_info(dev, "clock input from PHY\n");
        } else {
@@ -1094,6 +1157,9 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
                        if (!IS_ERR(bsp_priv->mac_clk_tx))
                                clk_prepare_enable(bsp_priv->mac_clk_tx);
 
+                       if (!IS_ERR(bsp_priv->clk_mac_speed))
+                               clk_prepare_enable(bsp_priv->clk_mac_speed);
+
                        /**
                         * if (!IS_ERR(bsp_priv->clk_mac))
                         *      clk_prepare_enable(bsp_priv->clk_mac);
@@ -1118,6 +1184,8 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
                        clk_disable_unprepare(bsp_priv->pclk_mac);
 
                        clk_disable_unprepare(bsp_priv->mac_clk_tx);
+
+                       clk_disable_unprepare(bsp_priv->clk_mac_speed);
                        /**
                         * if (!IS_ERR(bsp_priv->clk_mac))
                         *      clk_disable_unprepare(bsp_priv->clk_mac);
@@ -1414,6 +1482,7 @@ static int rk_gmac_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume);
 
 static const struct of_device_id rk_gmac_dwmac_match[] = {
+       { .compatible = "rockchip,px30-gmac",   .data = &px30_ops   },
        { .compatible = "rockchip,rk3128-gmac", .data = &rk3128_ops },
        { .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops },
        { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops },
index 65bc3556bd8f8c25b9b37421c80d6a663d8eb0db..edb6053bd9802574ee5b5ec9a1cea4de7678214e 100644 (file)
@@ -407,6 +407,19 @@ static void dwmac4_enable_tso(void __iomem *ioaddr, bool en, u32 chan)
        }
 }
 
+static void dwmac4_qmode(void __iomem *ioaddr, u32 channel, u8 qmode)
+{
+       u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+
+       mtl_tx_op &= ~MTL_OP_MODE_TXQEN_MASK;
+       if (qmode != MTL_QUEUE_AVB)
+               mtl_tx_op |= MTL_OP_MODE_TXQEN;
+       else
+               mtl_tx_op |= MTL_OP_MODE_TXQEN_AV;
+
+       writel(mtl_tx_op, ioaddr +  MTL_CHAN_TX_OP_MODE(channel));
+}
+
 static void dwmac4_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
 {
        u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
@@ -441,6 +454,7 @@ const struct stmmac_dma_ops dwmac4_dma_ops = {
        .set_rx_tail_ptr = dwmac4_set_rx_tail_ptr,
        .set_tx_tail_ptr = dwmac4_set_tx_tail_ptr,
        .enable_tso = dwmac4_enable_tso,
+       .qmode = dwmac4_qmode,
        .set_bfsize = dwmac4_set_bfsize,
 };
 
@@ -468,5 +482,6 @@ const struct stmmac_dma_ops dwmac410_dma_ops = {
        .set_rx_tail_ptr = dwmac4_set_rx_tail_ptr,
        .set_tx_tail_ptr = dwmac4_set_tx_tail_ptr,
        .enable_tso = dwmac4_enable_tso,
+       .qmode = dwmac4_qmode,
        .set_bfsize = dwmac4_set_bfsize,
 };
index fe8b536b13f864bfff723ea2236a3e5982026533..79911eefc2a7249ec347260e4026d5ebdbfe499a 100644 (file)
@@ -183,6 +183,7 @@ struct stmmac_dma_ops {
        void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
        void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
        void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
+       void (*qmode)(void __iomem *ioaddr, u32 channel, u8 qmode);
        void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan);
 };
 
@@ -236,6 +237,8 @@ struct stmmac_dma_ops {
        stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args)
 #define stmmac_enable_tso(__priv, __args...) \
        stmmac_do_void_callback(__priv, dma, enable_tso, __args)
+#define stmmac_dma_qmode(__priv, __args...) \
+       stmmac_do_void_callback(__priv, dma, qmode, __args)
 #define stmmac_set_dma_bfsize(__priv, __args...) \
        stmmac_do_void_callback(__priv, dma, set_bfsize, __args)
 
@@ -444,17 +447,22 @@ struct stmmac_mode_ops {
 
 struct stmmac_priv;
 struct tc_cls_u32_offload;
+struct tc_cbs_qopt_offload;
 
 struct stmmac_tc_ops {
        int (*init)(struct stmmac_priv *priv);
        int (*setup_cls_u32)(struct stmmac_priv *priv,
                             struct tc_cls_u32_offload *cls);
+       int (*setup_cbs)(struct stmmac_priv *priv,
+                        struct tc_cbs_qopt_offload *qopt);
 };
 
 #define stmmac_tc_init(__priv, __args...) \
        stmmac_do_callback(__priv, tc, init, __args)
 #define stmmac_tc_setup_cls_u32(__priv, __args...) \
        stmmac_do_callback(__priv, tc, setup_cls_u32, __args)
+#define stmmac_tc_setup_cbs(__priv, __args...) \
+       stmmac_do_callback(__priv, tc, setup_cbs, __args)
 
 struct stmmac_regs_off {
        u32 ptp_off;
index ef6a8d39db2f19b261ff9760d523dac664cffe93..9d104a05044df81db96b3c8abe4791567e741967 100644 (file)
@@ -3778,7 +3778,7 @@ static int stmmac_setup_tc_block(struct stmmac_priv *priv,
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block, stmmac_setup_tc_block_cb,
-                               priv, priv);
+                               priv, priv, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, stmmac_setup_tc_block_cb, priv);
                return 0;
@@ -3795,6 +3795,8 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
        switch (type) {
        case TC_SETUP_BLOCK:
                return stmmac_setup_tc_block(priv, type_data);
+       case TC_SETUP_QDISC_CBS:
+               return stmmac_tc_setup_cbs(priv, priv, type_data);
        default:
                return -EOPNOTSUPP;
        }
index 2258cd8cc84413f22c19a9339a3b6193427c464e..1a96dd9c1091e6c515753132c4a3fd0128f41bdf 100644 (file)
@@ -289,7 +289,67 @@ static int tc_init(struct stmmac_priv *priv)
        return 0;
 }
 
+static int tc_setup_cbs(struct stmmac_priv *priv,
+                       struct tc_cbs_qopt_offload *qopt)
+{
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 queue = qopt->queue;
+       u32 ptr, speed_div;
+       u32 mode_to_use;
+       u64 value;
+       int ret;
+
+       /* Queue 0 is not AVB capable */
+       if (queue <= 0 || queue >= tx_queues_count)
+               return -EINVAL;
+       if (priv->speed != SPEED_100 && priv->speed != SPEED_1000)
+               return -EOPNOTSUPP;
+
+       mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use;
+       if (mode_to_use == MTL_QUEUE_DCB && qopt->enable) {
+               ret = stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_AVB);
+               if (ret)
+                       return ret;
+
+               priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB;
+       } else if (!qopt->enable) {
+               return stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_DCB);
+       }
+
+       /* Port Transmit Rate and Speed Divider */
+       ptr = (priv->speed == SPEED_100) ? 4 : 8;
+       speed_div = (priv->speed == SPEED_100) ? 100000 : 1000000;
+
+       /* Final adjustments for HW */
+       value = div_s64(qopt->idleslope * 1024ll * ptr, speed_div);
+       priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0);
+
+       value = div_s64(-qopt->sendslope * 1024ll * ptr, speed_div);
+       priv->plat->tx_queues_cfg[queue].send_slope = value & GENMASK(31, 0);
+
+       value = qopt->hicredit * 1024ll * 8;
+       priv->plat->tx_queues_cfg[queue].high_credit = value & GENMASK(31, 0);
+
+       value = qopt->locredit * 1024ll * 8;
+       priv->plat->tx_queues_cfg[queue].low_credit = value & GENMASK(31, 0);
+
+       ret = stmmac_config_cbs(priv, priv->hw,
+                               priv->plat->tx_queues_cfg[queue].send_slope,
+                               priv->plat->tx_queues_cfg[queue].idle_slope,
+                               priv->plat->tx_queues_cfg[queue].high_credit,
+                               priv->plat->tx_queues_cfg[queue].low_credit,
+                               queue);
+       if (ret)
+               return ret;
+
+       dev_info(priv->device, "CBS queue %d: send %d, idle %d, hi %d, lo %d\n",
+                       queue, qopt->sendslope, qopt->idleslope,
+                       qopt->hicredit, qopt->locredit);
+       return 0;
+}
+
 const struct stmmac_tc_ops dwmac510_tc_ops = {
        .init = tc_init,
        .setup_cls_u32 = tc_setup_cls_u32,
+       .setup_cbs = tc_setup_cbs,
 };
index a5dd627fe2f9237a1af445c9ce2409fd0976c76c..d42f47f6c632fe8618348d40fc609bfed5deef4a 100644 (file)
@@ -101,7 +101,8 @@ static struct vnet_port *vsw_tx_port_find(struct sk_buff *skb,
 }
 
 static u16 vsw_select_queue(struct net_device *dev, struct sk_buff *skb,
-                           void *accel_priv, select_queue_fallback_t fallback)
+                           struct net_device *sb_dev,
+                           select_queue_fallback_t fallback)
 {
        struct vnet_port *port = netdev_priv(dev);
 
index 88c12474a0c38cc10f539d7eff0b81a0cc9a4d7a..9319d84bf49f07e9a9cf8514783dacb32b78fee7 100644 (file)
@@ -1225,25 +1225,9 @@ static int link_status_1g_rgmii(struct niu *np, int *link_up_p)
 
        bmsr = err;
        if (bmsr & BMSR_LSTATUS) {
-               u16 adv, lpa;
-
-               err = mii_read(np, np->phy_addr, MII_ADVERTISE);
-               if (err < 0)
-                       goto out;
-               adv = err;
-
-               err = mii_read(np, np->phy_addr, MII_LPA);
-               if (err < 0)
-                       goto out;
-               lpa = err;
-
-               err = mii_read(np, np->phy_addr, MII_ESTATUS);
-               if (err < 0)
-                       goto out;
                link_up = 1;
                current_speed = SPEED_1000;
                current_duplex = DUPLEX_FULL;
-
        }
        lp->active_speed = current_speed;
        lp->active_duplex = current_duplex;
index a94f50442613e9f77cec6aff24fbf19a5a33756b..12539b357a78402dfc80a4a654761051a2fa6409 100644 (file)
@@ -234,7 +234,8 @@ static struct vnet_port *vnet_tx_port_find(struct sk_buff *skb,
 }
 
 static u16 vnet_select_queue(struct net_device *dev, struct sk_buff *skb,
-                            void *accel_priv, select_queue_fallback_t fallback)
+                            struct net_device *sb_dev,
+                            select_queue_fallback_t fallback)
 {
        struct vnet *vp = netdev_priv(dev);
        struct vnet_port *port = __tx_port_find(vp, skb);
index 163d8d16bc245b48a10390d7706b21fd603489a4..dc966ddb6d815038b487f2edd05279e9397c30e7 100644 (file)
@@ -1151,7 +1151,6 @@ static void bdx_recycle_skb(struct bdx_priv *priv, struct rxd_desc *rxdd)
        struct rx_map *dm;
        struct rxf_fifo *f;
        struct rxdb *db;
-       struct sk_buff *skb;
        int delta;
 
        ENTER;
@@ -1161,7 +1160,6 @@ static void bdx_recycle_skb(struct bdx_priv *priv, struct rxd_desc *rxdd)
        DBG("db=%p f=%p\n", db, f);
        dm = bdx_rxdb_addr_elem(db, rxdd->va_lo);
        DBG("dm=%p\n", dm);
-       skb = dm->skb;
        rxfd = (struct rxf_desc *)(f->m.va + f->m.wptr);
        rxfd->info = CPU_CHIP_SWAP32(0x10003);  /* INFO=1 BC=3 */
        rxfd->va_lo = rxdd->va_lo;
index 358edab9e72eeee18b9c17d74e66f2de92d5cc87..f051ce35a440aa38441094de7b14b5da2ab065d7 100644 (file)
 #include <linux/sys_soc.h>
 
 #include <linux/pinctrl/consumer.h>
+#include <net/pkt_cls.h>
 
 #include "cpsw.h"
 #include "cpsw_ale.h"
 #include "cpts.h"
 #include "davinci_cpdma.h"
 
+#include <net/pkt_sched.h>
+
 #define CPSW_DEBUG     (NETIF_MSG_HW           | NETIF_MSG_WOL         | \
                         NETIF_MSG_DRV          | NETIF_MSG_LINK        | \
                         NETIF_MSG_IFUP         | NETIF_MSG_INTR        | \
@@ -153,6 +156,12 @@ do {                                                               \
 #define IRQ_NUM                        2
 #define CPSW_MAX_QUEUES                8
 #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
+#define CPSW_FIFO_QUEUE_TYPE_SHIFT     16
+#define CPSW_FIFO_SHAPE_EN_SHIFT       16
+#define CPSW_FIFO_RATE_EN_SHIFT                20
+#define CPSW_TC_NUM                    4
+#define CPSW_FIFO_SHAPERS_NUM          (CPSW_TC_NUM - 1)
+#define CPSW_PCT_MASK                  0x7f
 
 #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT      29
 #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK                GENMASK(2, 0)
@@ -253,23 +262,24 @@ struct cpsw_ss_regs {
 #define RX_DSCP_PRI_MAP7    0x4c /* Rx DSCP Priority to Rx Packet Mapping */
 
 /* Bit definitions for the CPSW2_CONTROL register */
-#define PASS_PRI_TAGGED     (1<<24) /* Pass Priority Tagged */
-#define VLAN_LTYPE2_EN      (1<<21) /* VLAN LTYPE 2 enable */
-#define VLAN_LTYPE1_EN      (1<<20) /* VLAN LTYPE 1 enable */
-#define DSCP_PRI_EN         (1<<16) /* DSCP Priority Enable */
-#define TS_320              (1<<14) /* Time Sync Dest Port 320 enable */
-#define TS_319              (1<<13) /* Time Sync Dest Port 319 enable */
-#define TS_132              (1<<12) /* Time Sync Dest IP Addr 132 enable */
-#define TS_131              (1<<11) /* Time Sync Dest IP Addr 131 enable */
-#define TS_130              (1<<10) /* Time Sync Dest IP Addr 130 enable */
-#define TS_129              (1<<9)  /* Time Sync Dest IP Addr 129 enable */
-#define TS_TTL_NONZERO      (1<<8)  /* Time Sync Time To Live Non-zero enable */
-#define TS_ANNEX_F_EN       (1<<6)  /* Time Sync Annex F enable */
-#define TS_ANNEX_D_EN       (1<<4)  /* Time Sync Annex D enable */
-#define TS_LTYPE2_EN        (1<<3)  /* Time Sync LTYPE 2 enable */
-#define TS_LTYPE1_EN        (1<<2)  /* Time Sync LTYPE 1 enable */
-#define TS_TX_EN            (1<<1)  /* Time Sync Transmit Enable */
-#define TS_RX_EN            (1<<0)  /* Time Sync Receive Enable */
+#define PASS_PRI_TAGGED     BIT(24) /* Pass Priority Tagged */
+#define VLAN_LTYPE2_EN      BIT(21) /* VLAN LTYPE 2 enable */
+#define VLAN_LTYPE1_EN      BIT(20) /* VLAN LTYPE 1 enable */
+#define DSCP_PRI_EN         BIT(16) /* DSCP Priority Enable */
+#define TS_107              BIT(15) /* Tyme Sync Dest IP Address 107 */
+#define TS_320              BIT(14) /* Time Sync Dest Port 320 enable */
+#define TS_319              BIT(13) /* Time Sync Dest Port 319 enable */
+#define TS_132              BIT(12) /* Time Sync Dest IP Addr 132 enable */
+#define TS_131              BIT(11) /* Time Sync Dest IP Addr 131 enable */
+#define TS_130              BIT(10) /* Time Sync Dest IP Addr 130 enable */
+#define TS_129              BIT(9)  /* Time Sync Dest IP Addr 129 enable */
+#define TS_TTL_NONZERO      BIT(8)  /* Time Sync Time To Live Non-zero enable */
+#define TS_ANNEX_F_EN       BIT(6)  /* Time Sync Annex F enable */
+#define TS_ANNEX_D_EN       BIT(4)  /* Time Sync Annex D enable */
+#define TS_LTYPE2_EN        BIT(3)  /* Time Sync LTYPE 2 enable */
+#define TS_LTYPE1_EN        BIT(2)  /* Time Sync LTYPE 1 enable */
+#define TS_TX_EN            BIT(1)  /* Time Sync Transmit Enable */
+#define TS_RX_EN            BIT(0)  /* Time Sync Receive Enable */
 
 #define CTRL_V2_TS_BITS \
        (TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
@@ -281,7 +291,7 @@ struct cpsw_ss_regs {
 
 
 #define CTRL_V3_TS_BITS \
-       (TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
+       (TS_107 | TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 |\
         TS_TTL_NONZERO | TS_ANNEX_F_EN | TS_ANNEX_D_EN |\
         TS_LTYPE1_EN)
 
@@ -453,6 +463,9 @@ struct cpsw_priv {
        u8                              mac_addr[ETH_ALEN];
        bool                            rx_pause;
        bool                            tx_pause;
+       bool                            mqprio_hw;
+       int                             fifo_bw[CPSW_TC_NUM];
+       int                             shp_cfg_speed;
        u32 emac_port;
        struct cpsw_common *cpsw;
 };
@@ -552,40 +565,28 @@ static const struct cpsw_stats cpsw_gstrings_ch_stats[] = {
                                (func)(slave++, ##arg);                 \
        } while (0)
 
-#define cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb)                \
-       do {                                                            \
-               if (!cpsw->data.dual_emac)                              \
-                       break;                                          \
-               if (CPDMA_RX_SOURCE_PORT(status) == 1) {                \
-                       ndev = cpsw->slaves[0].ndev;                    \
-                       skb->dev = ndev;                                \
-               } else if (CPDMA_RX_SOURCE_PORT(status) == 2) {         \
-                       ndev = cpsw->slaves[1].ndev;                    \
-                       skb->dev = ndev;                                \
-               }                                                       \
-       } while (0)
-#define cpsw_add_mcast(cpsw, priv, addr)                               \
-       do {                                                            \
-               if (cpsw->data.dual_emac) {                             \
-                       struct cpsw_slave *slave = cpsw->slaves +       \
-                                               priv->emac_port;        \
-                       int slave_port = cpsw_get_slave_port(           \
-                                               slave->slave_num);      \
-                       cpsw_ale_add_mcast(cpsw->ale, addr,             \
-                               1 << slave_port | ALE_PORT_HOST,        \
-                               ALE_VLAN, slave->port_vlan, 0);         \
-               } else {                                                \
-                       cpsw_ale_add_mcast(cpsw->ale, addr,             \
-                               ALE_ALL_PORTS,                          \
-                               0, 0, 0);                               \
-               }                                                       \
-       } while (0)
-
 static inline int cpsw_get_slave_port(u32 slave_num)
 {
        return slave_num + 1;
 }
 
+static void cpsw_add_mcast(struct cpsw_priv *priv, u8 *addr)
+{
+       struct cpsw_common *cpsw = priv->cpsw;
+
+       if (cpsw->data.dual_emac) {
+               struct cpsw_slave *slave = cpsw->slaves + priv->emac_port;
+               int slave_port = cpsw_get_slave_port(slave->slave_num);
+
+               cpsw_ale_add_mcast(cpsw->ale, addr,
+                                  1 << slave_port | ALE_PORT_HOST,
+                                  ALE_VLAN, slave->port_vlan, 0);
+               return;
+       }
+
+       cpsw_ale_add_mcast(cpsw->ale, addr, ALE_ALL_PORTS, 0, 0, 0);
+}
+
 static void cpsw_set_promiscious(struct net_device *ndev, bool enable)
 {
        struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
@@ -693,7 +694,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
 
                /* program multicast address list into ALE register */
                netdev_for_each_mc_addr(ha, ndev) {
-                       cpsw_add_mcast(cpsw, priv, (u8 *)ha->addr);
+                       cpsw_add_mcast(priv, ha->addr);
                }
        }
 }
@@ -785,10 +786,16 @@ static void cpsw_rx_handler(void *token, int len, int status)
        struct sk_buff          *skb = token;
        struct sk_buff          *new_skb;
        struct net_device       *ndev = skb->dev;
-       int                     ret = 0;
+       int                     ret = 0, port;
        struct cpsw_common      *cpsw = ndev_to_cpsw(ndev);
 
-       cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb);
+       if (cpsw->data.dual_emac) {
+               port = CPDMA_RX_SOURCE_PORT(status);
+               if (port) {
+                       ndev = cpsw->slaves[--port].ndev;
+                       skb->dev = ndev;
+               }
+       }
 
        if (unlikely(status < 0) || unlikely(!netif_running(ndev))) {
                /* In dual emac mode check for all interfaces */
@@ -967,8 +974,8 @@ static int cpsw_tx_mq_poll(struct napi_struct *napi_tx, int budget)
 
        /* process every unprocessed channel */
        ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
-       for (ch = 0, num_tx = 0; ch_map; ch_map >>= 1, ch++) {
-               if (!(ch_map & 0x01))
+       for (ch = 0, num_tx = 0; ch_map & 0xff; ch_map <<= 1, ch++) {
+               if (!(ch_map & 0x80))
                        continue;
 
                txv = &cpsw->txv[ch];
@@ -1077,6 +1084,38 @@ static void cpsw_set_slave_mac(struct cpsw_slave *slave,
        slave_write(slave, mac_lo(priv->mac_addr), SA_LO);
 }
 
+static bool cpsw_shp_is_off(struct cpsw_priv *priv)
+{
+       struct cpsw_common *cpsw = priv->cpsw;
+       struct cpsw_slave *slave;
+       u32 shift, mask, val;
+
+       val = readl_relaxed(&cpsw->regs->ptype);
+
+       slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+       shift = CPSW_FIFO_SHAPE_EN_SHIFT + 3 * slave->slave_num;
+       mask = 7 << shift;
+       val = val & mask;
+
+       return !val;
+}
+
+static void cpsw_fifo_shp_on(struct cpsw_priv *priv, int fifo, int on)
+{
+       struct cpsw_common *cpsw = priv->cpsw;
+       struct cpsw_slave *slave;
+       u32 shift, mask, val;
+
+       val = readl_relaxed(&cpsw->regs->ptype);
+
+       slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+       shift = CPSW_FIFO_SHAPE_EN_SHIFT + 3 * slave->slave_num;
+       mask = (1 << --fifo) << shift;
+       val = on ? val | mask : val & ~mask;
+
+       writel_relaxed(val, &cpsw->regs->ptype);
+}
+
 static void _cpsw_adjust_link(struct cpsw_slave *slave,
                              struct cpsw_priv *priv, bool *link)
 {
@@ -1116,6 +1155,12 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
                        mac_control |= BIT(4);
 
                *link = true;
+
+               if (priv->shp_cfg_speed &&
+                   priv->shp_cfg_speed != slave->phy->speed &&
+                   !cpsw_shp_is_off(priv))
+                       dev_warn(priv->dev,
+                                "Speed was changed, CBS shaper speeds are changed!");
        } else {
                mac_control = 0;
                /* disable forwarding */
@@ -1577,6 +1622,231 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw)
        soft_reset_slave(slave);
 }
 
+static int cpsw_tc_to_fifo(int tc, int num_tc)
+{
+       if (tc == num_tc - 1)
+               return 0;
+
+       return CPSW_FIFO_SHAPERS_NUM - tc;
+}
+
+static int cpsw_set_fifo_bw(struct cpsw_priv *priv, int fifo, int bw)
+{
+       struct cpsw_common *cpsw = priv->cpsw;
+       u32 val = 0, send_pct, shift;
+       struct cpsw_slave *slave;
+       int pct = 0, i;
+
+       if (bw > priv->shp_cfg_speed * 1000)
+               goto err;
+
+       /* shaping has to stay enabled for highest fifos linearly
+        * and fifo bw no more then interface can allow
+        */
+       slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+       send_pct = slave_read(slave, SEND_PERCENT);
+       for (i = CPSW_FIFO_SHAPERS_NUM; i > 0; i--) {
+               if (!bw) {
+                       if (i >= fifo || !priv->fifo_bw[i])
+                               continue;
+
+                       dev_warn(priv->dev, "Prev FIFO%d is shaped", i);
+                       continue;
+               }
+
+               if (!priv->fifo_bw[i] && i > fifo) {
+                       dev_err(priv->dev, "Upper FIFO%d is not shaped", i);
+                       return -EINVAL;
+               }
+
+               shift = (i - 1) * 8;
+               if (i == fifo) {
+                       send_pct &= ~(CPSW_PCT_MASK << shift);
+                       val = DIV_ROUND_UP(bw, priv->shp_cfg_speed * 10);
+                       if (!val)
+                               val = 1;
+
+                       send_pct |= val << shift;
+                       pct += val;
+                       continue;
+               }
+
+               if (priv->fifo_bw[i])
+                       pct += (send_pct >> shift) & CPSW_PCT_MASK;
+       }
+
+       if (pct >= 100)
+               goto err;
+
+       slave_write(slave, send_pct, SEND_PERCENT);
+       priv->fifo_bw[fifo] = bw;
+
+       dev_warn(priv->dev, "set FIFO%d bw = %d\n", fifo,
+                DIV_ROUND_CLOSEST(val * priv->shp_cfg_speed, 100));
+
+       return 0;
+err:
+       dev_err(priv->dev, "Bandwidth doesn't fit in tc configuration");
+       return -EINVAL;
+}
+
+static int cpsw_set_fifo_rlimit(struct cpsw_priv *priv, int fifo, int bw)
+{
+       struct cpsw_common *cpsw = priv->cpsw;
+       struct cpsw_slave *slave;
+       u32 tx_in_ctl_rg, val;
+       int ret;
+
+       ret = cpsw_set_fifo_bw(priv, fifo, bw);
+       if (ret)
+               return ret;
+
+       slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+       tx_in_ctl_rg = cpsw->version == CPSW_VERSION_1 ?
+                      CPSW1_TX_IN_CTL : CPSW2_TX_IN_CTL;
+
+       if (!bw)
+               cpsw_fifo_shp_on(priv, fifo, bw);
+
+       val = slave_read(slave, tx_in_ctl_rg);
+       if (cpsw_shp_is_off(priv)) {
+               /* disable FIFOs rate limited queues */
+               val &= ~(0xf << CPSW_FIFO_RATE_EN_SHIFT);
+
+               /* set type of FIFO queues to normal priority mode */
+               val &= ~(3 << CPSW_FIFO_QUEUE_TYPE_SHIFT);
+
+               /* set type of FIFO queues to be rate limited */
+               if (bw)
+                       val |= 2 << CPSW_FIFO_QUEUE_TYPE_SHIFT;
+               else
+                       priv->shp_cfg_speed = 0;
+       }
+
+       /* toggle a FIFO rate limited queue */
+       if (bw)
+               val |= BIT(fifo + CPSW_FIFO_RATE_EN_SHIFT);
+       else
+               val &= ~BIT(fifo + CPSW_FIFO_RATE_EN_SHIFT);
+       slave_write(slave, val, tx_in_ctl_rg);
+
+       /* FIFO transmit shape enable */
+       cpsw_fifo_shp_on(priv, fifo, bw);
+       return 0;
+}
+
+/* Defaults:
+ * class A - prio 3
+ * class B - prio 2
+ * shaping for class A should be set first
+ */
+static int cpsw_set_cbs(struct net_device *ndev,
+                       struct tc_cbs_qopt_offload *qopt)
+{
+       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
+       struct cpsw_slave *slave;
+       int prev_speed = 0;
+       int tc, ret, fifo;
+       u32 bw = 0;
+
+       tc = netdev_txq_to_tc(priv->ndev, qopt->queue);
+
+       /* enable channels in backward order, as highest FIFOs must be rate
+        * limited first and for compliance with CPDMA rate limited channels
+        * that also used in bacward order. FIFO0 cannot be rate limited.
+        */
+       fifo = cpsw_tc_to_fifo(tc, ndev->num_tc);
+       if (!fifo) {
+               dev_err(priv->dev, "Last tc%d can't be rate limited", tc);
+               return -EINVAL;
+       }
+
+       /* do nothing, it's disabled anyway */
+       if (!qopt->enable && !priv->fifo_bw[fifo])
+               return 0;
+
+       /* shapers can be set if link speed is known */
+       slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+       if (slave->phy && slave->phy->link) {
+               if (priv->shp_cfg_speed &&
+                   priv->shp_cfg_speed != slave->phy->speed)
+                       prev_speed = priv->shp_cfg_speed;
+
+               priv->shp_cfg_speed = slave->phy->speed;
+       }
+
+       if (!priv->shp_cfg_speed) {
+               dev_err(priv->dev, "Link speed is not known");
+               return -1;
+       }
+
+       ret = pm_runtime_get_sync(cpsw->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(cpsw->dev);
+               return ret;
+       }
+
+       bw = qopt->enable ? qopt->idleslope : 0;
+       ret = cpsw_set_fifo_rlimit(priv, fifo, bw);
+       if (ret) {
+               priv->shp_cfg_speed = prev_speed;
+               prev_speed = 0;
+       }
+
+       if (bw && prev_speed)
+               dev_warn(priv->dev,
+                        "Speed was changed, CBS shaper speeds are changed!");
+
+       pm_runtime_put_sync(cpsw->dev);
+       return ret;
+}
+
+static void cpsw_cbs_resume(struct cpsw_slave *slave, struct cpsw_priv *priv)
+{
+       int fifo, bw;
+
+       for (fifo = CPSW_FIFO_SHAPERS_NUM; fifo > 0; fifo--) {
+               bw = priv->fifo_bw[fifo];
+               if (!bw)
+                       continue;
+
+               cpsw_set_fifo_rlimit(priv, fifo, bw);
+       }
+}
+
+static void cpsw_mqprio_resume(struct cpsw_slave *slave, struct cpsw_priv *priv)
+{
+       struct cpsw_common *cpsw = priv->cpsw;
+       u32 tx_prio_map = 0;
+       int i, tc, fifo;
+       u32 tx_prio_rg;
+
+       if (!priv->mqprio_hw)
+               return;
+
+       for (i = 0; i < 8; i++) {
+               tc = netdev_get_prio_tc_map(priv->ndev, i);
+               fifo = CPSW_FIFO_SHAPERS_NUM - tc;
+               tx_prio_map |= fifo << (4 * i);
+       }
+
+       tx_prio_rg = cpsw->version == CPSW_VERSION_1 ?
+                    CPSW1_TX_PRI_MAP : CPSW2_TX_PRI_MAP;
+
+       slave_write(slave, tx_prio_map, tx_prio_rg);
+}
+
+/* restore resources after port reset */
+static void cpsw_restore(struct cpsw_priv *priv)
+{
+       /* restore MQPRIO offload */
+       for_each_slave(priv, cpsw_mqprio_resume, priv);
+
+       /* restore CBS offload */
+       for_each_slave(priv, cpsw_cbs_resume, priv);
+}
+
 static int cpsw_ndo_open(struct net_device *ndev)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
@@ -1656,6 +1926,8 @@ static int cpsw_ndo_open(struct net_device *ndev)
 
        }
 
+       cpsw_restore(priv);
+
        /* Enable Interrupt pacing if configured */
        if (cpsw->coal_intvl != 0) {
                struct ethtool_coalesce coal;
@@ -2190,6 +2462,78 @@ static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate)
        return ret;
 }
 
+static int cpsw_set_mqprio(struct net_device *ndev, void *type_data)
+{
+       struct tc_mqprio_qopt_offload *mqprio = type_data;
+       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
+       int fifo, num_tc, count, offset;
+       struct cpsw_slave *slave;
+       u32 tx_prio_map = 0;
+       int i, tc, ret;
+
+       num_tc = mqprio->qopt.num_tc;
+       if (num_tc > CPSW_TC_NUM)
+               return -EINVAL;
+
+       if (mqprio->mode != TC_MQPRIO_MODE_DCB)
+               return -EINVAL;
+
+       ret = pm_runtime_get_sync(cpsw->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(cpsw->dev);
+               return ret;
+       }
+
+       if (num_tc) {
+               for (i = 0; i < 8; i++) {
+                       tc = mqprio->qopt.prio_tc_map[i];
+                       fifo = cpsw_tc_to_fifo(tc, num_tc);
+                       tx_prio_map |= fifo << (4 * i);
+               }
+
+               netdev_set_num_tc(ndev, num_tc);
+               for (i = 0; i < num_tc; i++) {
+                       count = mqprio->qopt.count[i];
+                       offset = mqprio->qopt.offset[i];
+                       netdev_set_tc_queue(ndev, i, count, offset);
+               }
+       }
+
+       if (!mqprio->qopt.hw) {
+               /* restore default configuration */
+               netdev_reset_tc(ndev);
+               tx_prio_map = TX_PRIORITY_MAPPING;
+       }
+
+       priv->mqprio_hw = mqprio->qopt.hw;
+
+       offset = cpsw->version == CPSW_VERSION_1 ?
+                CPSW1_TX_PRI_MAP : CPSW2_TX_PRI_MAP;
+
+       slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+       slave_write(slave, tx_prio_map, offset);
+
+       pm_runtime_put_sync(cpsw->dev);
+
+       return 0;
+}
+
+static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+                            void *type_data)
+{
+       switch (type) {
+       case TC_SETUP_QDISC_CBS:
+               return cpsw_set_cbs(ndev, type_data);
+
+       case TC_SETUP_QDISC_MQPRIO:
+               return cpsw_set_mqprio(ndev, type_data);
+
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
 static const struct net_device_ops cpsw_netdev_ops = {
        .ndo_open               = cpsw_ndo_open,
        .ndo_stop               = cpsw_ndo_stop,
@@ -2205,6 +2549,7 @@ static const struct net_device_ops cpsw_netdev_ops = {
 #endif
        .ndo_vlan_rx_add_vid    = cpsw_ndo_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = cpsw_ndo_vlan_rx_kill_vid,
+       .ndo_setup_tc           = cpsw_ndo_setup_tc,
 };
 
 static int cpsw_get_regs_len(struct net_device *ndev)
@@ -2431,7 +2776,7 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
        void (*handler)(void *, int, int);
        struct netdev_queue *queue;
        struct cpsw_vector *vec;
-       int ret, *ch;
+       int ret, *ch, vch;
 
        if (rx) {
                ch = &cpsw->rx_ch_num;
@@ -2444,7 +2789,8 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
        }
 
        while (*ch < ch_num) {
-               vec[*ch].ch = cpdma_chan_create(cpsw->dma, *ch, handler, rx);
+               vch = rx ? *ch : 7 - *ch;
+               vec[*ch].ch = cpdma_chan_create(cpsw->dma, vch, handler, rx);
                queue = netdev_get_tx_queue(priv->ndev, *ch);
                queue->tx_maxrate = 0;
 
@@ -2927,7 +3273,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
                dev_info(cpsw->dev, "cpsw: Detected MACID = %pM\n",
                         priv_sl2->mac_addr);
        } else {
-               random_ether_addr(priv_sl2->mac_addr);
+               eth_random_addr(priv_sl2->mac_addr);
                dev_info(cpsw->dev, "cpsw: Random MACID = %pM\n",
                         priv_sl2->mac_addr);
        }
@@ -2935,7 +3281,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
 
        priv_sl2->emac_port = 1;
        cpsw->slaves[1].ndev = ndev;
-       ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+       ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX;
 
        ndev->netdev_ops = &cpsw_netdev_ops;
        ndev->ethtool_ops = &cpsw_ethtool_ops;
@@ -2981,7 +3327,7 @@ static int cpsw_probe(struct platform_device *pdev)
        u32 slave_offset, sliver_offset, slave_size;
        const struct soc_device_attribute *soc;
        struct cpsw_common              *cpsw;
-       int ret = 0, i;
+       int ret = 0, i, ch;
        int irq;
 
        cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL);
@@ -3156,7 +3502,8 @@ static int cpsw_probe(struct platform_device *pdev)
        if (soc)
                cpsw->quirk_irq = 1;
 
-       cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
+       ch = cpsw->quirk_irq ? 0 : 7;
+       cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, ch, cpsw_tx_handler, 0);
        if (IS_ERR(cpsw->txv[0].ch)) {
                dev_err(priv->dev, "error initializing tx dma channel\n");
                ret = PTR_ERR(cpsw->txv[0].ch);
index 6f63c8729afca1cf4e621bd1b18763850f33ffd3..b4ea58dc8caf878c231e5df845cbfc27357580c4 100644 (file)
@@ -114,7 +114,10 @@ static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event)
                        dev_consume_skb_any(skb);
                        dev_dbg(cpts->dev, "match tx timestamp mtype %u seqid %04x\n",
                                mtype, seqid);
-               } else if (time_after(jiffies, skb_cb->tmo)) {
+                       break;
+               }
+
+               if (time_after(jiffies, skb_cb->tmo)) {
                        /* timeout any expired skbs over 1s */
                        dev_dbg(cpts->dev,
                                "expiring tx timestamp mtype %u seqid %04x\n",
index 4f1267477aa4b56b7f3e1d19420302728da56e7d..4236dcdd56348512d6d2393ea27c19c0bd9078bf 100644 (file)
@@ -406,37 +406,36 @@ static int cpdma_chan_fit_rate(struct cpdma_chan *ch, u32 rate,
        struct cpdma_chan *chan;
        u32 old_rate = ch->rate;
        u32 new_rmask = 0;
-       int rlim = 1;
+       int rlim = 0;
        int i;
 
-       *prio_mode = 0;
        for (i = tx_chan_num(0); i < tx_chan_num(CPDMA_MAX_CHANNELS); i++) {
                chan = ctlr->channels[i];
-               if (!chan) {
-                       rlim = 0;
+               if (!chan)
                        continue;
-               }
 
                if (chan == ch)
                        chan->rate = rate;
 
                if (chan->rate) {
-                       if (rlim) {
-                               new_rmask |= chan->mask;
-                       } else {
-                               ch->rate = old_rate;
-                               dev_err(ctlr->dev, "Prev channel of %dch is not rate limited\n",
-                                       chan->chan_num);
-                               return -EINVAL;
-                       }
-               } else {
-                       *prio_mode = 1;
-                       rlim = 0;
+                       rlim = 1;
+                       new_rmask |= chan->mask;
+                       continue;
                }
+
+               if (rlim)
+                       goto err;
        }
 
        *rmask = new_rmask;
+       *prio_mode = rlim;
        return 0;
+
+err:
+       ch->rate = old_rate;
+       dev_err(ctlr->dev, "Upper cpdma ch%d is not rate limited\n",
+               chan->chan_num);
+       return -EINVAL;
 }
 
 static u32 cpdma_chan_set_factors(struct cpdma_ctlr *ctlr,
index e40aa3e31af2345489a4dbc896593c7dc0f574cd..a1d335a3c5e43884348b5d96a9bc9eaed92f0bcb 100644 (file)
@@ -1889,13 +1889,6 @@ static int netcp_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
        return err;
 }
 
-static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
-                             void *accel_priv,
-                             select_queue_fallback_t fallback)
-{
-       return 0;
-}
-
 static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type,
                          void *type_data)
 {
@@ -1972,7 +1965,7 @@ static const struct net_device_ops netcp_netdev_ops = {
        .ndo_vlan_rx_add_vid    = netcp_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = netcp_rx_kill_vid,
        .ndo_tx_timeout         = netcp_ndo_tx_timeout,
-       .ndo_select_queue       = netcp_select_queue,
+       .ndo_select_queue       = dev_pick_tx_zero,
        .ndo_setup_tc           = netcp_setup_tc,
 };
 
@@ -2052,7 +2045,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
                if (is_valid_ether_addr(efuse_mac_addr))
                        ether_addr_copy(ndev->dev_addr, efuse_mac_addr);
                else
-                       random_ether_addr(ndev->dev_addr);
+                       eth_random_addr(ndev->dev_addr);
 
                devm_iounmap(dev, efuse);
                devm_release_mem_region(dev, res.start, size);
@@ -2061,7 +2054,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
                if (mac_addr)
                        ether_addr_copy(ndev->dev_addr, mac_addr);
                else
-                       random_ether_addr(ndev->dev_addr);
+                       eth_random_addr(ndev->dev_addr);
        }
 
        ret = of_property_read_string(node_interface, "rx-channel",
index 2a0c06e0f730c35e5e7d40d9d9893bf36cf9c6fa..42f1f518dad6939300905f8b6218f4f31fabd212 100644 (file)
@@ -70,7 +70,8 @@
 #define XEL_TSR_XMIT_IE_MASK    0x00000008     /* Tx interrupt enable bit */
 #define XEL_TSR_XMIT_ACTIVE_MASK 0x80000000    /* Buffer is active, SW bit
                                                 * only. This is not documented
-                                                * in the HW spec */
+                                                * in the HW spec
+                                                */
 
 /* Define for programming the MAC address into the EmacLite */
 #define XEL_TSR_PROG_MAC_ADDR  (XEL_TSR_XMIT_BUSY_MASK | XEL_TSR_PROGRAM_MASK)
 
 
 
-#define TX_TIMEOUT             (60*HZ)         /* Tx timeout is 60 seconds. */
+#define TX_TIMEOUT             (60 * HZ)       /* Tx timeout is 60 seconds. */
 #define ALIGNMENT              4
 
 /* BUFFER_ALIGN(adr) calculates the number of bytes to the next alignment. */
-#define BUFFER_ALIGN(adr) ((ALIGNMENT - ((u32) adr)) % ALIGNMENT)
+#define BUFFER_ALIGN(adr) ((ALIGNMENT - ((u32)adr)) % ALIGNMENT)
 
 #ifdef __BIG_ENDIAN
 #define xemaclite_readl                ioread32be
@@ -238,8 +239,8 @@ static void xemaclite_aligned_write(void *src_ptr, u32 *dest_ptr,
 
                /* Set up to output the remaining data */
                align_buffer = 0;
-               to_u8_ptr = (u8 *) &align_buffer;
-               from_u8_ptr = (u8 *) from_u16_ptr;
+               to_u8_ptr = (u8 *)&align_buffer;
+               from_u8_ptr = (u8 *)from_u16_ptr;
 
                /* Output the remaining data */
                for (; length > 0; length--)
@@ -272,7 +273,7 @@ static void xemaclite_aligned_read(u32 *src_ptr, u8 *dest_ptr,
        u32 align_buffer;
 
        from_u32_ptr = src_ptr;
-       to_u16_ptr = (u16 *) dest_ptr;
+       to_u16_ptr = (u16 *)dest_ptr;
 
        for (; length > 3; length -= 4) {
                /* Copy each word into the temporary buffer */
@@ -288,9 +289,9 @@ static void xemaclite_aligned_read(u32 *src_ptr, u8 *dest_ptr,
                u8 *to_u8_ptr, *from_u8_ptr;
 
                /* Set up to read the remaining data */
-               to_u8_ptr = (u8 *) to_u16_ptr;
+               to_u8_ptr = (u8 *)to_u16_ptr;
                align_buffer = *from_u32_ptr++;
-               from_u8_ptr = (u8 *) &align_buffer;
+               from_u8_ptr = (u8 *)&align_buffer;
 
                /* Read the remaining data */
                for (; length > 0; length--)
@@ -336,7 +337,8 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data,
                        drvdata->next_tx_buf_to_use ^= XEL_BUFFER_OFFSET;
        } else if (drvdata->tx_ping_pong != 0) {
                /* If the expected buffer is full, try the other buffer,
-                * if it is configured in HW */
+                * if it is configured in HW
+                */
 
                addr = (void __iomem __force *)((u32 __force)addr ^
                                                 XEL_BUFFER_OFFSET);
@@ -349,7 +351,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data,
                return -1; /* Buffer was full, return failure */
 
        /* Write the frame to the buffer */
-       xemaclite_aligned_write(data, (u32 __force *) addr, byte_count);
+       xemaclite_aligned_write(data, (u32 __force *)addr, byte_count);
 
        xemaclite_writel((byte_count & XEL_TPLR_LENGTH_MASK),
                         addr + XEL_TPLR_OFFSET);
@@ -357,7 +359,8 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data,
        /* Update the Tx Status Register to indicate that there is a
         * frame to send. Set the XEL_TSR_XMIT_ACTIVE_MASK flag which
         * is used by the interrupt handler to check whether a frame
-        * has been transmitted */
+        * has been transmitted
+        */
        reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET);
        reg_data |= (XEL_TSR_XMIT_BUSY_MASK | XEL_TSR_XMIT_ACTIVE_MASK);
        xemaclite_writel(reg_data, addr + XEL_TSR_OFFSET);
@@ -369,6 +372,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data,
  * xemaclite_recv_data - Receive a frame
  * @drvdata:   Pointer to the Emaclite device private data
  * @data:      Address where the data is to be received
+ * @maxlen:    Maximum supported ethernet packet length
  *
  * This function is intended to be called from the interrupt context or
  * with a wrapper which waits for the receive frame to be available.
@@ -394,7 +398,8 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen)
                /* The instance is out of sync, try other buffer if other
                 * buffer is configured, return 0 otherwise. If the instance is
                 * out of sync, do not update the 'next_rx_buf_to_use' since it
-                * will correct on subsequent calls */
+                * will correct on subsequent calls
+                */
                if (drvdata->rx_ping_pong != 0)
                        addr = (void __iomem __force *)((u32 __force)addr ^
                                                         XEL_BUFFER_OFFSET);
@@ -408,13 +413,15 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen)
                        return 0;       /* No data was available */
        }
 
-       /* Get the protocol type of the ethernet frame that arrived */
+       /* Get the protocol type of the ethernet frame that arrived
+        */
        proto_type = ((ntohl(xemaclite_readl(addr + XEL_HEADER_OFFSET +
                        XEL_RXBUFF_OFFSET)) >> XEL_HEADER_SHIFT) &
                        XEL_RPLR_LENGTH_MASK);
 
        /* Check if received ethernet frame is a raw ethernet frame
-        * or an IP packet or an ARP packet */
+        * or an IP packet or an ARP packet
+        */
        if (proto_type > ETH_DATA_LEN) {
 
                if (proto_type == ETH_P_IP) {
@@ -430,7 +437,8 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen)
                        length = XEL_ARP_PACKET_SIZE + ETH_HLEN + ETH_FCS_LEN;
                else
                        /* Field contains type other than IP or ARP, use max
-                        * frame size and let user parse it */
+                        * frame size and let user parse it
+                        */
                        length = ETH_FRAME_LEN + ETH_FCS_LEN;
        } else
                /* Use the length in the frame, plus the header and trailer */
@@ -440,7 +448,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen)
                length = maxlen;
 
        /* Read from the EmacLite device */
-       xemaclite_aligned_read((u32 __force *) (addr + XEL_RXBUFF_OFFSET),
+       xemaclite_aligned_read((u32 __force *)(addr + XEL_RXBUFF_OFFSET),
                                data, length);
 
        /* Acknowledge the frame */
@@ -471,7 +479,7 @@ static void xemaclite_update_address(struct net_local *drvdata,
        /* Determine the expected Tx buffer address */
        addr = drvdata->base_addr + drvdata->next_tx_buf_to_use;
 
-       xemaclite_aligned_write(address_ptr, (u32 __force *) addr, ETH_ALEN);
+       xemaclite_aligned_write(address_ptr, (u32 __force *)addr, ETH_ALEN);
 
        xemaclite_writel(ETH_ALEN, addr + XEL_TPLR_OFFSET);
 
@@ -488,7 +496,7 @@ static void xemaclite_update_address(struct net_local *drvdata,
 /**
  * xemaclite_set_mac_address - Set the MAC address for this device
  * @dev:       Pointer to the network device instance
- * @addr:      Void pointer to the sockaddr structure
+ * @address:   Void pointer to the sockaddr structure
  *
  * This function copies the HW address from the sockaddr strucutre to the
  * net_device structure and updates the address in HW.
@@ -564,19 +572,19 @@ static void xemaclite_tx_handler(struct net_device *dev)
        struct net_local *lp = netdev_priv(dev);
 
        dev->stats.tx_packets++;
-       if (lp->deferred_skb) {
-               if (xemaclite_send_data(lp,
-                                       (u8 *) lp->deferred_skb->data,
-                                       lp->deferred_skb->len) != 0)
-                       return;
-               else {
-                       dev->stats.tx_bytes += lp->deferred_skb->len;
-                       dev_kfree_skb_irq(lp->deferred_skb);
-                       lp->deferred_skb = NULL;
-                       netif_trans_update(dev); /* prevent tx timeout */
-                       netif_wake_queue(dev);
-               }
-       }
+
+       if (!lp->deferred_skb)
+               return;
+
+       if (xemaclite_send_data(lp, (u8 *)lp->deferred_skb->data,
+                               lp->deferred_skb->len))
+               return;
+
+       dev->stats.tx_bytes += lp->deferred_skb->len;
+       dev_kfree_skb_irq(lp->deferred_skb);
+       lp->deferred_skb = NULL;
+       netif_trans_update(dev); /* prevent tx timeout */
+       netif_wake_queue(dev);
 }
 
 /**
@@ -602,18 +610,18 @@ static void xemaclite_rx_handler(struct net_device *dev)
                return;
        }
 
-       /*
-        * A new skb should have the data halfword aligned, but this code is
+       /* A new skb should have the data halfword aligned, but this code is
         * here just in case that isn't true. Calculate how many
         * bytes we should reserve to get the data to start on a word
-        * boundary */
+        * boundary
+        */
        align = BUFFER_ALIGN(skb->data);
        if (align)
                skb_reserve(skb, align);
 
        skb_reserve(skb, 2);
 
-       len = xemaclite_recv_data(lp, (u8 *) skb->data, len);
+       len = xemaclite_recv_data(lp, (u8 *)skb->data, len);
 
        if (!len) {
                dev->stats.rx_errors++;
@@ -639,6 +647,8 @@ static void xemaclite_rx_handler(struct net_device *dev)
  * @dev_id:    Void pointer to the network device instance used as callback
  *             reference
  *
+ * Return:     IRQ_HANDLED
+ *
  * This function handles the Tx and Rx interrupts of the EmacLite device.
  */
 static irqreturn_t xemaclite_interrupt(int irq, void *dev_id)
@@ -706,8 +716,8 @@ static int xemaclite_mdio_wait(struct net_local *lp)
        unsigned long end = jiffies + 2;
 
        /* wait for the MDIO interface to not be busy or timeout
-          after some time.
-       */
+        * after some time.
+        */
        while (xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET) &
                        XEL_MDIOCTRL_MDIOSTS_MASK) {
                if (time_before_eq(end, jiffies)) {
@@ -757,7 +767,7 @@ static int xemaclite_mdio_read(struct mii_bus *bus, int phy_id, int reg)
        rc = xemaclite_readl(lp->base_addr + XEL_MDIORD_OFFSET);
 
        dev_dbg(&lp->ndev->dev,
-               "xemaclite_mdio_read(phy_id=%i, reg=%x) == %x\n",
+               "%s(phy_id=%i, reg=%x) == %x\n", __func__,
                phy_id, reg, rc);
 
        return rc;
@@ -772,6 +782,8 @@ static int xemaclite_mdio_read(struct mii_bus *bus, int phy_id, int reg)
  *
  * This function waits till the device is ready to accept a new MDIO
  * request and then writes the val to the MDIO Write Data register.
+ *
+ * Return:      0 upon success or a negative error upon failure
  */
 static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg,
                                u16 val)
@@ -780,7 +792,7 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg,
        u32 ctrl_reg;
 
        dev_dbg(&lp->ndev->dev,
-               "xemaclite_mdio_write(phy_id=%i, reg=%x, val=%x)\n",
+               "%s(phy_id=%i, reg=%x, val=%x)\n", __func__,
                phy_id, reg, val);
 
        if (xemaclite_mdio_wait(lp))
@@ -805,7 +817,7 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg,
 /**
  * xemaclite_mdio_setup - Register mii_bus for the Emaclite device
  * @lp:                Pointer to the Emaclite device private data
- * @ofdev:     Pointer to OF device structure
+ * @dev:       Pointer to OF device structure
  *
  * This function enables MDIO bus in the Emaclite device and registers a
  * mii_bus.
@@ -905,6 +917,9 @@ static void xemaclite_adjust_link(struct net_device *ndev)
  * This function sets the MAC address, requests an IRQ and enables interrupts
  * for the Emaclite device and starts the Tx queue.
  * It also connects to the phy device, if MDIO is included in Emaclite device.
+ *
+ * Return:     0 on success. -ENODEV, if PHY cannot be connected.
+ *             Non-zero error value on failure.
  */
 static int xemaclite_open(struct net_device *dev)
 {
@@ -975,6 +990,8 @@ static int xemaclite_open(struct net_device *dev)
  * This function stops the Tx queue, disables interrupts and frees the IRQ for
  * the Emaclite device.
  * It also disconnects the phy device associated with the Emaclite device.
+ *
+ * Return:     0, always.
  */
 static int xemaclite_close(struct net_device *dev)
 {
@@ -1017,10 +1034,11 @@ static int xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev)
        new_skb = orig_skb;
 
        spin_lock_irqsave(&lp->reset_lock, flags);
-       if (xemaclite_send_data(lp, (u8 *) new_skb->data, len) != 0) {
+       if (xemaclite_send_data(lp, (u8 *)new_skb->data, len) != 0) {
                /* If the Emaclite Tx buffer is busy, stop the Tx queue and
                 * defer the skb for transmission during the ISR, after the
-                * current transmission is complete */
+                * current transmission is complete
+                */
                netif_stop_queue(dev);
                lp->deferred_skb = new_skb;
                /* Take the time stamp now, since we can't do this in an ISR. */
@@ -1052,13 +1070,12 @@ static bool get_bool(struct platform_device *ofdev, const char *s)
 {
        u32 *p = (u32 *)of_get_property(ofdev->dev.of_node, s, NULL);
 
-       if (p) {
-               return (bool)*p;
-       } else {
-               dev_warn(&ofdev->dev, "Parameter %s not found,"
-                       "defaulting to false\n", s);
+       if (!p) {
+               dev_warn(&ofdev->dev, "Parameter %s not found, defaulting to false\n", s);
                return false;
        }
+
+       return (bool)*p;
 }
 
 static const struct net_device_ops xemaclite_netdev_ops;
@@ -1066,7 +1083,6 @@ static const struct net_device_ops xemaclite_netdev_ops;
 /**
  * xemaclite_of_probe - Probe method for the Emaclite device.
  * @ofdev:     Pointer to OF device structure
- * @match:     Pointer to the structure used for matching a device
  *
  * This function probes for the Emaclite device in the device tree.
  * It initializes the driver data structure and the hardware, sets the MAC
index 750954be5a7403d0a6c752649866b43546354f88..d3eae123904575aba430883ab7a0ba4d28b8b07c 100644 (file)
@@ -1395,8 +1395,8 @@ static void fjes_watch_unshare_task(struct work_struct *work)
 
        while ((unshare_watch_bitmask || hw->txrx_stop_req_bit) &&
               (wait_time < 3000)) {
-               for (epidx = 0; epidx < hw->max_epid; epidx++) {
-                       if (epidx == hw->my_epid)
+               for (epidx = 0; epidx < max_epid; epidx++) {
+                       if (epidx == my_epid)
                                continue;
 
                        is_shared = fjes_hw_epid_is_shared(hw->hw_info.share,
@@ -1453,8 +1453,8 @@ static void fjes_watch_unshare_task(struct work_struct *work)
        }
 
        if (hw->hw_info.buffer_unshare_reserve_bit) {
-               for (epidx = 0; epidx < hw->max_epid; epidx++) {
-                       if (epidx == hw->my_epid)
+               for (epidx = 0; epidx < max_epid; epidx++) {
+                       if (epidx == my_epid)
                                continue;
 
                        if (test_bit(epidx,
index ada33c2d9ac20e01af4acec33727623204fda803..6acb6b5718b94c1050fdbef60a4a556e27c2275e 100644 (file)
@@ -236,7 +236,8 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
                }
                /* Update tunnel dst according to Geneve options. */
                ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
-                                       gnvh->options, gnvh->opt_len * 4);
+                                       gnvh->options, gnvh->opt_len * 4,
+                                       TUNNEL_GENEVE_OPT);
        } else {
                /* Drop packets w/ critical options,
                 * since we don't support any...
@@ -418,11 +419,12 @@ static int geneve_hlen(struct genevehdr *gh)
        return sizeof(*gh) + gh->opt_len * 4;
 }
 
-static struct sk_buff **geneve_gro_receive(struct sock *sk,
-                                          struct sk_buff **head,
-                                          struct sk_buff *skb)
+static struct sk_buff *geneve_gro_receive(struct sock *sk,
+                                         struct list_head *head,
+                                         struct sk_buff *skb)
 {
-       struct sk_buff *p, **pp = NULL;
+       struct sk_buff *pp = NULL;
+       struct sk_buff *p;
        struct genevehdr *gh, *gh2;
        unsigned int hlen, gh_len, off_gnv;
        const struct packet_offload *ptype;
@@ -449,7 +451,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk,
                        goto out;
        }
 
-       for (p = *head; p; p = p->next) {
+       list_for_each_entry(p, head, list) {
                if (!NAPI_GRO_CB(p)->same_flow)
                        continue;
 
@@ -674,7 +676,8 @@ static void geneve_build_header(struct genevehdr *geneveh,
        geneveh->proto_type = htons(ETH_P_TEB);
        geneveh->rsvd2 = 0;
 
-       ip_tunnel_info_opts_get(geneveh->options, info);
+       if (info->key.tun_flags & TUNNEL_GENEVE_OPT)
+               ip_tunnel_info_opts_get(geneveh->options, info);
 }
 
 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
index ec629a730005273924c40ace6977ad7fedecc315..7a145172d50385f56f54e4b114ab6194fd672ca5 100644 (file)
@@ -1255,7 +1255,7 @@ out:
        return skb->len;
 }
 
-static struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = {
+static const struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = {
        [GTPA_LINK]             = { .type = NLA_U32, },
        [GTPA_VERSION]          = { .type = NLA_U32, },
        [GTPA_TID]              = { .type = NLA_U64, },
index 32f49c4ce45783aa507b8473f4a5ab60e495fa8f..d79a69dd2146d347b3f44892e2534fe06290ca4a 100644 (file)
@@ -878,10 +878,8 @@ static void decode_data(struct sixpack *sp, unsigned char inbyte)
 
 static void decode_prio_command(struct sixpack *sp, unsigned char cmd)
 {
-       unsigned char channel;
        int actual;
 
-       channel = cmd & SIXP_CHN_MASK;
        if ((cmd & SIXP_PRIO_DATA_MASK) != 0) {     /* idle ? */
 
        /* RX and DCD flags can only be set in the same prio command,
@@ -933,10 +931,9 @@ static void decode_prio_command(struct sixpack *sp, unsigned char cmd)
 
 static void decode_std_command(struct sixpack *sp, unsigned char cmd)
 {
-       unsigned char checksum = 0, rest = 0, channel;
+       unsigned char checksum = 0, rest = 0;
        short i;
 
-       channel = cmd & SIXP_CHN_MASK;
        switch (cmd & SIXP_CMD_MASK) {     /* normal command */
        case SIXP_SEOF:
                if ((sp->rx_count == 0) && (sp->rx_count_cooked == 0)) {
index 4b6e308199d270cd455b7df0de20a8458f6b7941..a32ded5b4f416f662e2a820356f16c9bfbef00db 100644 (file)
@@ -873,6 +873,17 @@ struct netvsc_ethtool_stats {
        unsigned long wake_queue;
 };
 
+struct netvsc_ethtool_pcpu_stats {
+       u64     rx_packets;
+       u64     rx_bytes;
+       u64     tx_packets;
+       u64     tx_bytes;
+       u64     vf_rx_packets;
+       u64     vf_rx_bytes;
+       u64     vf_tx_packets;
+       u64     vf_tx_bytes;
+};
+
 struct netvsc_vf_pcpu_stats {
        u64     rx_packets;
        u64     rx_bytes;
index dd1d6e115145d4c14fb25d1883d1e42614e211a9..20275d1e6f9a969bed77561e516277bef5d3289b 100644 (file)
@@ -329,7 +329,7 @@ static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
 }
 
 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
-                              void *accel_priv,
+                              struct net_device *sb_dev,
                               select_queue_fallback_t fallback)
 {
        struct net_device_context *ndc = netdev_priv(ndev);
@@ -343,9 +343,9 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 
                if (vf_ops->ndo_select_queue)
                        txq = vf_ops->ndo_select_queue(vf_netdev, skb,
-                                                      accel_priv, fallback);
+                                                      sb_dev, fallback);
                else
-                       txq = fallback(vf_netdev, skb);
+                       txq = fallback(vf_netdev, skb, NULL);
 
                /* Record the queue selected by VF so that it can be
                 * used for common case where VF has more queues than
@@ -1118,6 +1118,64 @@ static void netvsc_get_vf_stats(struct net_device *net,
        }
 }
 
+static void netvsc_get_pcpu_stats(struct net_device *net,
+                                 struct netvsc_ethtool_pcpu_stats *pcpu_tot)
+{
+       struct net_device_context *ndev_ctx = netdev_priv(net);
+       struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+       int i;
+
+       /* fetch percpu stats of vf */
+       for_each_possible_cpu(i) {
+               const struct netvsc_vf_pcpu_stats *stats =
+                       per_cpu_ptr(ndev_ctx->vf_stats, i);
+               struct netvsc_ethtool_pcpu_stats *this_tot = &pcpu_tot[i];
+               unsigned int start;
+
+               do {
+                       start = u64_stats_fetch_begin_irq(&stats->syncp);
+                       this_tot->vf_rx_packets = stats->rx_packets;
+                       this_tot->vf_tx_packets = stats->tx_packets;
+                       this_tot->vf_rx_bytes = stats->rx_bytes;
+                       this_tot->vf_tx_bytes = stats->tx_bytes;
+               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+               this_tot->rx_packets = this_tot->vf_rx_packets;
+               this_tot->tx_packets = this_tot->vf_tx_packets;
+               this_tot->rx_bytes   = this_tot->vf_rx_bytes;
+               this_tot->tx_bytes   = this_tot->vf_tx_bytes;
+       }
+
+       /* fetch percpu stats of netvsc */
+       for (i = 0; i < nvdev->num_chn; i++) {
+               const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+               const struct netvsc_stats *stats;
+               struct netvsc_ethtool_pcpu_stats *this_tot =
+                       &pcpu_tot[nvchan->channel->target_cpu];
+               u64 packets, bytes;
+               unsigned int start;
+
+               stats = &nvchan->tx_stats;
+               do {
+                       start = u64_stats_fetch_begin_irq(&stats->syncp);
+                       packets = stats->packets;
+                       bytes = stats->bytes;
+               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+               this_tot->tx_bytes      += bytes;
+               this_tot->tx_packets    += packets;
+
+               stats = &nvchan->rx_stats;
+               do {
+                       start = u64_stats_fetch_begin_irq(&stats->syncp);
+                       packets = stats->packets;
+                       bytes = stats->bytes;
+               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+               this_tot->rx_bytes      += bytes;
+               this_tot->rx_packets    += packets;
+       }
+}
+
 static void netvsc_get_stats64(struct net_device *net,
                               struct rtnl_link_stats64 *t)
 {
@@ -1215,6 +1273,23 @@ static const struct {
        { "rx_no_memory", offsetof(struct netvsc_ethtool_stats, rx_no_memory) },
        { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
        { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
+}, pcpu_stats[] = {
+       { "cpu%u_rx_packets",
+               offsetof(struct netvsc_ethtool_pcpu_stats, rx_packets) },
+       { "cpu%u_rx_bytes",
+               offsetof(struct netvsc_ethtool_pcpu_stats, rx_bytes) },
+       { "cpu%u_tx_packets",
+               offsetof(struct netvsc_ethtool_pcpu_stats, tx_packets) },
+       { "cpu%u_tx_bytes",
+               offsetof(struct netvsc_ethtool_pcpu_stats, tx_bytes) },
+       { "cpu%u_vf_rx_packets",
+               offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_packets) },
+       { "cpu%u_vf_rx_bytes",
+               offsetof(struct netvsc_ethtool_pcpu_stats, vf_rx_bytes) },
+       { "cpu%u_vf_tx_packets",
+               offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_packets) },
+       { "cpu%u_vf_tx_bytes",
+               offsetof(struct netvsc_ethtool_pcpu_stats, vf_tx_bytes) },
 }, vf_stats[] = {
        { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
        { "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1226,6 +1301,9 @@ static const struct {
 #define NETVSC_GLOBAL_STATS_LEN        ARRAY_SIZE(netvsc_stats)
 #define NETVSC_VF_STATS_LEN    ARRAY_SIZE(vf_stats)
 
+/* statistics per queue (rx/tx packets/bytes) */
+#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
+
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
 
@@ -1241,7 +1319,8 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
        case ETH_SS_STATS:
                return NETVSC_GLOBAL_STATS_LEN
                        + NETVSC_VF_STATS_LEN
-                       + NETVSC_QUEUE_STATS_LEN(nvdev);
+                       + NETVSC_QUEUE_STATS_LEN(nvdev)
+                       + NETVSC_PCPU_STATS_LEN;
        default:
                return -EINVAL;
        }
@@ -1255,9 +1334,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
        const void *nds = &ndc->eth_stats;
        const struct netvsc_stats *qstats;
        struct netvsc_vf_pcpu_stats sum;
+       struct netvsc_ethtool_pcpu_stats *pcpu_sum;
        unsigned int start;
        u64 packets, bytes;
-       int i, j;
+       int i, j, cpu;
 
        if (!nvdev)
                return;
@@ -1289,6 +1369,19 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
                data[i++] = packets;
                data[i++] = bytes;
        }
+
+       pcpu_sum = kvmalloc_array(num_possible_cpus(),
+                                 sizeof(struct netvsc_ethtool_pcpu_stats),
+                                 GFP_KERNEL);
+       netvsc_get_pcpu_stats(dev, pcpu_sum);
+       for_each_present_cpu(cpu) {
+               struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu];
+
+               for (j = 0; j < ARRAY_SIZE(pcpu_stats); j++)
+                       data[i++] = *(u64 *)((void *)this_sum
+                                            + pcpu_stats[j].offset);
+       }
+       kvfree(pcpu_sum);
 }
 
 static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -1296,7 +1389,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
        struct net_device_context *ndc = netdev_priv(dev);
        struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
        u8 *p = data;
-       int i;
+       int i, cpu;
 
        if (!nvdev)
                return;
@@ -1324,6 +1417,13 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
                        p += ETH_GSTRING_LEN;
                }
 
+               for_each_present_cpu(cpu) {
+                       for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
+                               sprintf(p, pcpu_stats[i].name, cpu);
+                               p += ETH_GSTRING_LEN;
+                       }
+               }
+
                break;
        }
 }
index adde8fc45588ba12c82bd79cf830e3ed99b907e3..cfda146f3b3bbb799532a48e2705c2fd4b5f2661 100644 (file)
@@ -514,7 +514,6 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
        const struct macvlan_dev *vlan = netdev_priv(dev);
        const struct macvlan_port *port = vlan->port;
        const struct macvlan_dev *dest;
-       void *accel_priv = NULL;
 
        if (vlan->mode == MACVLAN_MODE_BRIDGE) {
                const struct ethhdr *eth = (void *)skb->data;
@@ -533,15 +532,10 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
                        return NET_XMIT_SUCCESS;
                }
        }
-
-       /* For packets that are non-multicast and not bridged we will pass
-        * the necessary information so that the lowerdev can distinguish
-        * the source of the packets via the accel_priv value.
-        */
-       accel_priv = vlan->accel_priv;
 xmit_world:
        skb->dev = vlan->lowerdev;
-       return dev_queue_xmit_accel(skb, accel_priv);
+       return dev_queue_xmit_accel(skb,
+                                   netdev_get_sb_channel(dev) ? dev : NULL);
 }
 
 static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
@@ -1647,6 +1641,7 @@ static int macvlan_device_event(struct notifier_block *unused,
 
        switch (event) {
        case NETDEV_UP:
+       case NETDEV_DOWN:
        case NETDEV_CHANGE:
                list_for_each_entry(vlan, &port->vlans, list)
                        netif_stacked_transfer_operstate(vlan->lowerdev,
index 4f390fa557e4ba0c897b20faefaa85b03f4ec70a..7ae1856d1f185207222fdc22907a8056b68ca057 100644 (file)
@@ -115,7 +115,8 @@ static netdev_tx_t net_failover_start_xmit(struct sk_buff *skb,
 }
 
 static u16 net_failover_select_queue(struct net_device *dev,
-                                    struct sk_buff *skb, void *accel_priv,
+                                    struct sk_buff *skb,
+                                    struct net_device *sb_dev,
                                     select_queue_fallback_t fallback)
 {
        struct net_failover_info *nfo_info = netdev_priv(dev);
@@ -128,9 +129,9 @@ static u16 net_failover_select_queue(struct net_device *dev,
 
                if (ops->ndo_select_queue)
                        txq = ops->ndo_select_queue(primary_dev, skb,
-                                                   accel_priv, fallback);
+                                                   sb_dev, fallback);
                else
-                       txq = fallback(primary_dev, skb);
+                       txq = fallback(primary_dev, skb, NULL);
 
                qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
 
@@ -219,14 +220,14 @@ static int net_failover_change_mtu(struct net_device *dev, int new_mtu)
        struct net_device *primary_dev, *standby_dev;
        int ret = 0;
 
-       primary_dev = rcu_dereference(nfo_info->primary_dev);
+       primary_dev = rtnl_dereference(nfo_info->primary_dev);
        if (primary_dev) {
                ret = dev_set_mtu(primary_dev, new_mtu);
                if (ret)
                        return ret;
        }
 
-       standby_dev = rcu_dereference(nfo_info->standby_dev);
+       standby_dev = rtnl_dereference(nfo_info->standby_dev);
        if (standby_dev) {
                ret = dev_set_mtu(standby_dev, new_mtu);
                if (ret) {
index 449b2a1a18007e49c8d55993433cf865db893577..0fee1d06c0848f274baedb51ef30594cf4358332 100644 (file)
@@ -13,3 +13,7 @@ endif
 ifneq ($(CONFIG_NET_DEVLINK),)
 netdevsim-objs += devlink.o fib.o
 endif
+
+ifneq ($(CONFIG_XFRM_OFFLOAD),)
+netdevsim-objs += ipsec.o
+endif
index 75c25306d2347d34ee7f6847df1cd9e2dc866a40..81444208b2162126d515dc0948db09e936079a3f 100644 (file)
@@ -92,7 +92,7 @@ static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = {
 
 static bool nsim_xdp_offload_active(struct netdevsim *ns)
 {
-       return ns->xdp_prog_mode == XDP_ATTACHED_HW;
+       return ns->xdp_hw.prog;
 }
 
 static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded)
@@ -195,14 +195,14 @@ static int nsim_xdp_offload_prog(struct netdevsim *ns, struct netdev_bpf *bpf)
        return nsim_bpf_offload(ns, bpf->prog, nsim_xdp_offload_active(ns));
 }
 
-static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf)
+static int
+nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf,
+                 struct xdp_attachment_info *xdp)
 {
        int err;
 
-       if (ns->xdp_prog && (bpf->flags ^ ns->xdp_flags) & XDP_FLAGS_MODES) {
-               NSIM_EA(bpf->extack, "program loaded with different flags");
+       if (!xdp_attachment_flags_ok(xdp, bpf))
                return -EBUSY;
-       }
 
        if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) {
                NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS");
@@ -219,18 +219,7 @@ static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf)
                        return err;
        }
 
-       if (ns->xdp_prog)
-               bpf_prog_put(ns->xdp_prog);
-
-       ns->xdp_prog = bpf->prog;
-       ns->xdp_flags = bpf->flags;
-
-       if (!bpf->prog)
-               ns->xdp_prog_mode = XDP_ATTACHED_NONE;
-       else if (bpf->command == XDP_SETUP_PROG)
-               ns->xdp_prog_mode = XDP_ATTACHED_DRV;
-       else
-               ns->xdp_prog_mode = XDP_ATTACHED_HW;
+       xdp_attachment_setup(xdp, bpf);
 
        return 0;
 }
@@ -249,8 +238,8 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
        state->state = "verify";
 
        /* Program id is not populated yet when we create the state. */
-       sprintf(name, "%u", ns->prog_id_gen++);
-       state->ddir = debugfs_create_dir(name, ns->ddir_bpf_bound_progs);
+       sprintf(name, "%u", ns->sdev->prog_id_gen++);
+       state->ddir = debugfs_create_dir(name, ns->sdev->ddir_bpf_bound_progs);
        if (IS_ERR_OR_NULL(state->ddir)) {
                kfree(state);
                return -ENOMEM;
@@ -261,7 +250,7 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
                            &state->state, &nsim_bpf_string_fops);
        debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded);
 
-       list_add_tail(&state->l, &ns->bpf_bound_progs);
+       list_add_tail(&state->l, &ns->sdev->bpf_bound_progs);
 
        prog->aux->offload->dev_priv = state;
 
@@ -290,10 +279,6 @@ static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
                NSIM_EA(bpf->extack, "MTU too large w/ XDP enabled");
                return -EINVAL;
        }
-       if (nsim_xdp_offload_active(ns)) {
-               NSIM_EA(bpf->extack, "xdp offload active, can't load drv prog");
-               return -EBUSY;
-       }
        return 0;
 }
 
@@ -309,7 +294,7 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
                NSIM_EA(bpf->extack, "xdpoffload of non-bound program");
                return -EINVAL;
        }
-       if (bpf->prog->aux->offload->netdev != ns->netdev) {
+       if (!bpf_offload_dev_match(bpf->prog, ns->netdev)) {
                NSIM_EA(bpf->extack, "program bound to different dev");
                return -EINVAL;
        }
@@ -512,7 +497,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap)
        }
 
        offmap->dev_ops = &nsim_bpf_map_ops;
-       list_add_tail(&nmap->l, &ns->bpf_bound_maps);
+       list_add_tail(&nmap->l, &ns->sdev->bpf_bound_maps);
 
        return 0;
 
@@ -567,22 +552,21 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
                nsim_bpf_destroy_prog(bpf->offload.prog);
                return 0;
        case XDP_QUERY_PROG:
-               bpf->prog_attached = ns->xdp_prog_mode;
-               bpf->prog_id = ns->xdp_prog ? ns->xdp_prog->aux->id : 0;
-               bpf->prog_flags = ns->xdp_prog ? ns->xdp_flags : 0;
-               return 0;
+               return xdp_attachment_query(&ns->xdp, bpf);
+       case XDP_QUERY_PROG_HW:
+               return xdp_attachment_query(&ns->xdp_hw, bpf);
        case XDP_SETUP_PROG:
                err = nsim_setup_prog_checks(ns, bpf);
                if (err)
                        return err;
 
-               return nsim_xdp_set_prog(ns, bpf);
+               return nsim_xdp_set_prog(ns, bpf, &ns->xdp);
        case XDP_SETUP_PROG_HW:
                err = nsim_setup_prog_hw_checks(ns, bpf);
                if (err)
                        return err;
 
-               return nsim_xdp_set_prog(ns, bpf);
+               return nsim_xdp_set_prog(ns, bpf, &ns->xdp_hw);
        case BPF_OFFLOAD_MAP_ALLOC:
                if (!ns->bpf_map_accept)
                        return -EOPNOTSUPP;
@@ -598,8 +582,26 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 
 int nsim_bpf_init(struct netdevsim *ns)
 {
-       INIT_LIST_HEAD(&ns->bpf_bound_progs);
-       INIT_LIST_HEAD(&ns->bpf_bound_maps);
+       int err;
+
+       if (ns->sdev->refcnt == 1) {
+               INIT_LIST_HEAD(&ns->sdev->bpf_bound_progs);
+               INIT_LIST_HEAD(&ns->sdev->bpf_bound_maps);
+
+               ns->sdev->ddir_bpf_bound_progs =
+                       debugfs_create_dir("bpf_bound_progs", ns->sdev->ddir);
+               if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs))
+                       return -ENOMEM;
+
+               ns->sdev->bpf_dev = bpf_offload_dev_create();
+               err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
+               if (err)
+                       return err;
+       }
+
+       err = bpf_offload_dev_netdev_register(ns->sdev->bpf_dev, ns->netdev);
+       if (err)
+               goto err_destroy_bdev;
 
        debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
                           &ns->bpf_offloaded_id);
@@ -609,10 +611,6 @@ int nsim_bpf_init(struct netdevsim *ns)
                            &ns->bpf_bind_accept);
        debugfs_create_u32("bpf_bind_verifier_delay", 0600, ns->ddir,
                           &ns->bpf_bind_verifier_delay);
-       ns->ddir_bpf_bound_progs =
-               debugfs_create_dir("bpf_bound_progs", ns->ddir);
-       if (IS_ERR_OR_NULL(ns->ddir_bpf_bound_progs))
-               return -ENOMEM;
 
        ns->bpf_tc_accept = true;
        debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir,
@@ -631,12 +629,23 @@ int nsim_bpf_init(struct netdevsim *ns)
                            &ns->bpf_map_accept);
 
        return 0;
+
+err_destroy_bdev:
+       if (ns->sdev->refcnt == 1)
+               bpf_offload_dev_destroy(ns->sdev->bpf_dev);
+       return err;
 }
 
 void nsim_bpf_uninit(struct netdevsim *ns)
 {
-       WARN_ON(!list_empty(&ns->bpf_bound_progs));
-       WARN_ON(!list_empty(&ns->bpf_bound_maps));
-       WARN_ON(ns->xdp_prog);
+       WARN_ON(ns->xdp.prog);
+       WARN_ON(ns->xdp_hw.prog);
        WARN_ON(ns->bpf_offloaded);
+       bpf_offload_dev_netdev_unregister(ns->sdev->bpf_dev, ns->netdev);
+
+       if (ns->sdev->refcnt == 1) {
+               WARN_ON(!list_empty(&ns->sdev->bpf_bound_progs));
+               WARN_ON(!list_empty(&ns->sdev->bpf_bound_maps));
+               bpf_offload_dev_destroy(ns->sdev->bpf_dev);
+       }
 }
diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c
new file mode 100644 (file)
index 0000000..2dcf6cc
--- /dev/null
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Oracle and/or its affiliates. All rights reserved. */
+
+#include <crypto/aead.h>
+#include <linux/debugfs.h>
+#include <net/xfrm.h>
+
+#include "netdevsim.h"
+
+#define NSIM_IPSEC_AUTH_BITS   128
+
+static ssize_t nsim_dbg_netdev_ops_read(struct file *filp,
+                                       char __user *buffer,
+                                       size_t count, loff_t *ppos)
+{
+       struct netdevsim *ns = filp->private_data;
+       struct nsim_ipsec *ipsec = &ns->ipsec;
+       size_t bufsize;
+       char *buf, *p;
+       int len;
+       int i;
+
+       /* the buffer needed is
+        * (num SAs * 3 lines each * ~60 bytes per line) + one more line
+        */
+       bufsize = (ipsec->count * 4 * 60) + 60;
+       buf = kzalloc(bufsize, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       p = buf;
+       p += snprintf(p, bufsize - (p - buf),
+                     "SA count=%u tx=%u\n",
+                     ipsec->count, ipsec->tx);
+
+       for (i = 0; i < NSIM_IPSEC_MAX_SA_COUNT; i++) {
+               struct nsim_sa *sap = &ipsec->sa[i];
+
+               if (!sap->used)
+                       continue;
+
+               p += snprintf(p, bufsize - (p - buf),
+                             "sa[%i] %cx ipaddr=0x%08x %08x %08x %08x\n",
+                             i, (sap->rx ? 'r' : 't'), sap->ipaddr[0],
+                             sap->ipaddr[1], sap->ipaddr[2], sap->ipaddr[3]);
+               p += snprintf(p, bufsize - (p - buf),
+                             "sa[%i]    spi=0x%08x proto=0x%x salt=0x%08x crypt=%d\n",
+                             i, be32_to_cpu(sap->xs->id.spi),
+                             sap->xs->id.proto, sap->salt, sap->crypt);
+               p += snprintf(p, bufsize - (p - buf),
+                             "sa[%i]    key=0x%08x %08x %08x %08x\n",
+                             i, sap->key[0], sap->key[1],
+                             sap->key[2], sap->key[3]);
+       }
+
+       len = simple_read_from_buffer(buffer, count, ppos, buf, p - buf);
+
+       kfree(buf);
+       return len;
+}
+
+static const struct file_operations ipsec_dbg_fops = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = nsim_dbg_netdev_ops_read,
+};
+
+static int nsim_ipsec_find_empty_idx(struct nsim_ipsec *ipsec)
+{
+       u32 i;
+
+       if (ipsec->count == NSIM_IPSEC_MAX_SA_COUNT)
+               return -ENOSPC;
+
+       /* search sa table */
+       for (i = 0; i < NSIM_IPSEC_MAX_SA_COUNT; i++) {
+               if (!ipsec->sa[i].used)
+                       return i;
+       }
+
+       return -ENOSPC;
+}
+
+static int nsim_ipsec_parse_proto_keys(struct xfrm_state *xs,
+                                      u32 *mykey, u32 *mysalt)
+{
+       const char aes_gcm_name[] = "rfc4106(gcm(aes))";
+       struct net_device *dev = xs->xso.dev;
+       unsigned char *key_data;
+       char *alg_name = NULL;
+       int key_len;
+
+       if (!xs->aead) {
+               netdev_err(dev, "Unsupported IPsec algorithm\n");
+               return -EINVAL;
+       }
+
+       if (xs->aead->alg_icv_len != NSIM_IPSEC_AUTH_BITS) {
+               netdev_err(dev, "IPsec offload requires %d bit authentication\n",
+                          NSIM_IPSEC_AUTH_BITS);
+               return -EINVAL;
+       }
+
+       key_data = &xs->aead->alg_key[0];
+       key_len = xs->aead->alg_key_len;
+       alg_name = xs->aead->alg_name;
+
+       if (strcmp(alg_name, aes_gcm_name)) {
+               netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
+                          aes_gcm_name);
+               return -EINVAL;
+       }
+
+       /* 160 accounts for 16 byte key and 4 byte salt */
+       if (key_len > NSIM_IPSEC_AUTH_BITS) {
+               *mysalt = ((u32 *)key_data)[4];
+       } else if (key_len == NSIM_IPSEC_AUTH_BITS) {
+               *mysalt = 0;
+       } else {
+               netdev_err(dev, "IPsec hw offload only supports 128 bit keys with optional 32 bit salt\n");
+               return -EINVAL;
+       }
+       memcpy(mykey, key_data, 16);
+
+       return 0;
+}
+
+static int nsim_ipsec_add_sa(struct xfrm_state *xs)
+{
+       struct nsim_ipsec *ipsec;
+       struct net_device *dev;
+       struct netdevsim *ns;
+       struct nsim_sa sa;
+       u16 sa_idx;
+       int ret;
+
+       dev = xs->xso.dev;
+       ns = netdev_priv(dev);
+       ipsec = &ns->ipsec;
+
+       if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) {
+               netdev_err(dev, "Unsupported protocol 0x%04x for ipsec offload\n",
+                          xs->id.proto);
+               return -EINVAL;
+       }
+
+       if (xs->calg) {
+               netdev_err(dev, "Compression offload not supported\n");
+               return -EINVAL;
+       }
+
+       /* find the first unused index */
+       ret = nsim_ipsec_find_empty_idx(ipsec);
+       if (ret < 0) {
+               netdev_err(dev, "No space for SA in Rx table!\n");
+               return ret;
+       }
+       sa_idx = (u16)ret;
+
+       memset(&sa, 0, sizeof(sa));
+       sa.used = true;
+       sa.xs = xs;
+
+       if (sa.xs->id.proto & IPPROTO_ESP)
+               sa.crypt = xs->ealg || xs->aead;
+
+       /* get the key and salt */
+       ret = nsim_ipsec_parse_proto_keys(xs, sa.key, &sa.salt);
+       if (ret) {
+               netdev_err(dev, "Failed to get key data for SA table\n");
+               return ret;
+       }
+
+       if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+               sa.rx = true;
+
+               if (xs->props.family == AF_INET6)
+                       memcpy(sa.ipaddr, &xs->id.daddr.a6, 16);
+               else
+                       memcpy(&sa.ipaddr[3], &xs->id.daddr.a4, 4);
+       }
+
+       /* the preparations worked, so save the info */
+       memcpy(&ipsec->sa[sa_idx], &sa, sizeof(sa));
+
+       /* the XFRM stack doesn't like offload_handle == 0,
+        * so add a bitflag in case our array index is 0
+        */
+       xs->xso.offload_handle = sa_idx | NSIM_IPSEC_VALID;
+       ipsec->count++;
+
+       return 0;
+}
+
+static void nsim_ipsec_del_sa(struct xfrm_state *xs)
+{
+       struct netdevsim *ns = netdev_priv(xs->xso.dev);
+       struct nsim_ipsec *ipsec = &ns->ipsec;
+       u16 sa_idx;
+
+       sa_idx = xs->xso.offload_handle & ~NSIM_IPSEC_VALID;
+       if (!ipsec->sa[sa_idx].used) {
+               netdev_err(ns->netdev, "Invalid SA for delete sa_idx=%d\n",
+                          sa_idx);
+               return;
+       }
+
+       memset(&ipsec->sa[sa_idx], 0, sizeof(struct nsim_sa));
+       ipsec->count--;
+}
+
+static bool nsim_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+{
+       struct netdevsim *ns = netdev_priv(xs->xso.dev);
+       struct nsim_ipsec *ipsec = &ns->ipsec;
+
+       ipsec->ok++;
+
+       return true;
+}
+
+static const struct xfrmdev_ops nsim_xfrmdev_ops = {
+       .xdo_dev_state_add      = nsim_ipsec_add_sa,
+       .xdo_dev_state_delete   = nsim_ipsec_del_sa,
+       .xdo_dev_offload_ok     = nsim_ipsec_offload_ok,
+};
+
+bool nsim_ipsec_tx(struct netdevsim *ns, struct sk_buff *skb)
+{
+       struct nsim_ipsec *ipsec = &ns->ipsec;
+       struct xfrm_state *xs;
+       struct nsim_sa *tsa;
+       u32 sa_idx;
+
+       /* do we even need to check this packet? */
+       if (!skb->sp)
+               return true;
+
+       if (unlikely(!skb->sp->len)) {
+               netdev_err(ns->netdev, "no xfrm state len = %d\n",
+                          skb->sp->len);
+               return false;
+       }
+
+       xs = xfrm_input_state(skb);
+       if (unlikely(!xs)) {
+               netdev_err(ns->netdev, "no xfrm_input_state() xs = %p\n", xs);
+               return false;
+       }
+
+       sa_idx = xs->xso.offload_handle & ~NSIM_IPSEC_VALID;
+       if (unlikely(sa_idx >= NSIM_IPSEC_MAX_SA_COUNT)) {
+               netdev_err(ns->netdev, "bad sa_idx=%d max=%d\n",
+                          sa_idx, NSIM_IPSEC_MAX_SA_COUNT);
+               return false;
+       }
+
+       tsa = &ipsec->sa[sa_idx];
+       if (unlikely(!tsa->used)) {
+               netdev_err(ns->netdev, "unused sa_idx=%d\n", sa_idx);
+               return false;
+       }
+
+       if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) {
+               netdev_err(ns->netdev, "unexpected proto=%d\n", xs->id.proto);
+               return false;
+       }
+
+       ipsec->tx++;
+
+       return true;
+}
+
+void nsim_ipsec_init(struct netdevsim *ns)
+{
+       ns->netdev->xfrmdev_ops = &nsim_xfrmdev_ops;
+
+#define NSIM_ESP_FEATURES      (NETIF_F_HW_ESP | \
+                                NETIF_F_HW_ESP_TX_CSUM | \
+                                NETIF_F_GSO_ESP)
+
+       ns->netdev->features |= NSIM_ESP_FEATURES;
+       ns->netdev->hw_enc_features |= NSIM_ESP_FEATURES;
+
+       ns->ipsec.pfile = debugfs_create_file("ipsec", 0400, ns->ddir, ns,
+                                             &ipsec_dbg_fops);
+}
+
+void nsim_ipsec_teardown(struct netdevsim *ns)
+{
+       struct nsim_ipsec *ipsec = &ns->ipsec;
+
+       if (ipsec->count)
+               netdev_err(ns->netdev, "tearing down IPsec offload with %d SAs left\n",
+                          ipsec->count);
+       debugfs_remove_recursive(ipsec->pfile);
+}
index ec68f38213d9c31e4049893020e1dc2bb97eea3b..8d8e2b3f263e6127d9af53a7a0eda62e44b5b42b 100644 (file)
@@ -22,6 +22,7 @@
 #include <net/netlink.h>
 #include <net/pkt_cls.h>
 #include <net/rtnetlink.h>
+#include <net/switchdev.h>
 
 #include "netdevsim.h"
 
@@ -40,6 +41,9 @@ struct nsim_vf_config {
 
 static u32 nsim_dev_id;
 
+static struct dentry *nsim_ddir;
+static struct dentry *nsim_sdev_ddir;
+
 static int nsim_num_vf(struct device *dev)
 {
        struct netdevsim *ns = to_nsim(dev);
@@ -144,8 +148,29 @@ static struct device_type nsim_dev_type = {
        .release = nsim_dev_release,
 };
 
+static int
+nsim_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+{
+       struct netdevsim *ns = netdev_priv(dev);
+
+       switch (attr->id) {
+       case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+               attr->u.ppid.id_len = sizeof(ns->sdev->switch_id);
+               memcpy(&attr->u.ppid.id, &ns->sdev->switch_id,
+                      attr->u.ppid.id_len);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static const struct switchdev_ops nsim_switchdev_ops = {
+       .switchdev_port_attr_get        = nsim_port_attr_get,
+};
+
 static int nsim_init(struct net_device *dev)
 {
+       char sdev_ddir_name[10], sdev_link_name[32];
        struct netdevsim *ns = netdev_priv(dev);
        int err;
 
@@ -154,9 +179,32 @@ static int nsim_init(struct net_device *dev)
        if (IS_ERR_OR_NULL(ns->ddir))
                return -ENOMEM;
 
+       if (!ns->sdev) {
+               ns->sdev = kzalloc(sizeof(*ns->sdev), GFP_KERNEL);
+               if (!ns->sdev) {
+                       err = -ENOMEM;
+                       goto err_debugfs_destroy;
+               }
+               ns->sdev->refcnt = 1;
+               ns->sdev->switch_id = nsim_dev_id;
+               sprintf(sdev_ddir_name, "%u", ns->sdev->switch_id);
+               ns->sdev->ddir = debugfs_create_dir(sdev_ddir_name,
+                                                   nsim_sdev_ddir);
+               if (IS_ERR_OR_NULL(ns->sdev->ddir)) {
+                       err = PTR_ERR_OR_ZERO(ns->sdev->ddir) ?: -EINVAL;
+                       goto err_sdev_free;
+               }
+       } else {
+               sprintf(sdev_ddir_name, "%u", ns->sdev->switch_id);
+               ns->sdev->refcnt++;
+       }
+
+       sprintf(sdev_link_name, "../../" DRV_NAME "_sdev/%s", sdev_ddir_name);
+       debugfs_create_symlink("sdev", ns->ddir, sdev_link_name);
+
        err = nsim_bpf_init(ns);
        if (err)
-               goto err_debugfs_destroy;
+               goto err_sdev_destroy;
 
        ns->dev.id = nsim_dev_id++;
        ns->dev.bus = &nsim_bus;
@@ -166,17 +214,26 @@ static int nsim_init(struct net_device *dev)
                goto err_bpf_uninit;
 
        SET_NETDEV_DEV(dev, &ns->dev);
+       SWITCHDEV_SET_OPS(dev, &nsim_switchdev_ops);
 
        err = nsim_devlink_setup(ns);
        if (err)
                goto err_unreg_dev;
 
+       nsim_ipsec_init(ns);
+
        return 0;
 
 err_unreg_dev:
        device_unregister(&ns->dev);
 err_bpf_uninit:
        nsim_bpf_uninit(ns);
+err_sdev_destroy:
+       if (!--ns->sdev->refcnt) {
+               debugfs_remove_recursive(ns->sdev->ddir);
+err_sdev_free:
+               kfree(ns->sdev);
+       }
 err_debugfs_destroy:
        debugfs_remove_recursive(ns->ddir);
        return err;
@@ -186,9 +243,14 @@ static void nsim_uninit(struct net_device *dev)
 {
        struct netdevsim *ns = netdev_priv(dev);
 
+       nsim_ipsec_teardown(ns);
        nsim_devlink_teardown(ns);
        debugfs_remove_recursive(ns->ddir);
        nsim_bpf_uninit(ns);
+       if (!--ns->sdev->refcnt) {
+               debugfs_remove_recursive(ns->sdev->ddir);
+               kfree(ns->sdev);
+       }
 }
 
 static void nsim_free(struct net_device *dev)
@@ -203,11 +265,15 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct netdevsim *ns = netdev_priv(dev);
 
+       if (!nsim_ipsec_tx(ns, skb))
+               goto out;
+
        u64_stats_update_begin(&ns->syncp);
        ns->tx_packets++;
        ns->tx_bytes += skb->len;
        u64_stats_update_end(&ns->syncp);
 
+out:
        dev_kfree_skb(skb);
 
        return NETDEV_TX_OK;
@@ -221,8 +287,7 @@ static int nsim_change_mtu(struct net_device *dev, int new_mtu)
 {
        struct netdevsim *ns = netdev_priv(dev);
 
-       if (ns->xdp_prog_mode == XDP_ATTACHED_DRV &&
-           new_mtu > NSIM_XDP_MAX_MTU)
+       if (ns->xdp.prog && new_mtu > NSIM_XDP_MAX_MTU)
                return -EBUSY;
 
        dev->mtu = new_mtu;
@@ -260,7 +325,7 @@ nsim_setup_tc_block(struct net_device *dev, struct tc_block_offload *f)
        switch (f->command) {
        case TC_BLOCK_BIND:
                return tcf_block_cb_register(f->block, nsim_setup_tc_block_cb,
-                                            ns, ns);
+                                            ns, ns, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, nsim_setup_tc_block_cb, ns);
                return 0;
@@ -464,15 +529,46 @@ static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
        return 0;
 }
 
+static int nsim_newlink(struct net *src_net, struct net_device *dev,
+                       struct nlattr *tb[], struct nlattr *data[],
+                       struct netlink_ext_ack *extack)
+{
+       struct netdevsim *ns = netdev_priv(dev);
+
+       if (tb[IFLA_LINK]) {
+               struct net_device *joindev;
+               struct netdevsim *joinns;
+
+               joindev = __dev_get_by_index(src_net,
+                                            nla_get_u32(tb[IFLA_LINK]));
+               if (!joindev)
+                       return -ENODEV;
+               if (joindev->netdev_ops != &nsim_netdev_ops)
+                       return -EINVAL;
+
+               joinns = netdev_priv(joindev);
+               if (!joinns->sdev || !joinns->sdev->refcnt)
+                       return -EINVAL;
+               ns->sdev = joinns->sdev;
+       }
+
+       return register_netdevice(dev);
+}
+
+static void nsim_dellink(struct net_device *dev, struct list_head *head)
+{
+       unregister_netdevice_queue(dev, head);
+}
+
 static struct rtnl_link_ops nsim_link_ops __read_mostly = {
        .kind           = DRV_NAME,
        .priv_size      = sizeof(struct netdevsim),
        .setup          = nsim_setup,
        .validate       = nsim_validate,
+       .newlink        = nsim_newlink,
+       .dellink        = nsim_dellink,
 };
 
-struct dentry *nsim_ddir;
-
 static int __init nsim_module_init(void)
 {
        int err;
@@ -481,9 +577,15 @@ static int __init nsim_module_init(void)
        if (IS_ERR_OR_NULL(nsim_ddir))
                return -ENOMEM;
 
+       nsim_sdev_ddir = debugfs_create_dir(DRV_NAME "_sdev", NULL);
+       if (IS_ERR_OR_NULL(nsim_sdev_ddir)) {
+               err = -ENOMEM;
+               goto err_debugfs_destroy;
+       }
+
        err = bus_register(&nsim_bus);
        if (err)
-               goto err_debugfs_destroy;
+               goto err_sdir_destroy;
 
        err = nsim_devlink_init();
        if (err)
@@ -499,6 +601,8 @@ err_dl_fini:
        nsim_devlink_exit();
 err_unreg_bus:
        bus_unregister(&nsim_bus);
+err_sdir_destroy:
+       debugfs_remove_recursive(nsim_sdev_ddir);
 err_debugfs_destroy:
        debugfs_remove_recursive(nsim_ddir);
        return err;
@@ -509,6 +613,7 @@ static void __exit nsim_module_exit(void)
        rtnl_link_unregister(&nsim_link_ops);
        nsim_devlink_exit();
        bus_unregister(&nsim_bus);
+       debugfs_remove_recursive(nsim_sdev_ddir);
        debugfs_remove_recursive(nsim_ddir);
 }
 
index 8ca50b72c3287f41130beb61da42cf1bec360177..384c254fafc5c0513c54851f3e88c030d3e0847d 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/u64_stats_sync.h>
+#include <net/xdp.h>
 
 #define DRV_NAME       "netdevsim"
 
 #define NSIM_EA(extack, msg)   NL_SET_ERR_MSG_MOD((extack), msg)
 
 struct bpf_prog;
+struct bpf_offload_dev;
 struct dentry;
 struct nsim_vf_config;
 
+struct netdevsim_shared_dev {
+       unsigned int refcnt;
+       u32 switch_id;
+
+       struct dentry *ddir;
+
+       struct bpf_offload_dev *bpf_dev;
+
+       struct dentry *ddir_bpf_bound_progs;
+       u32 prog_id_gen;
+
+       struct list_head bpf_bound_progs;
+       struct list_head bpf_bound_maps;
+};
+
+#define NSIM_IPSEC_MAX_SA_COUNT                33
+#define NSIM_IPSEC_VALID               BIT(31)
+
+struct nsim_sa {
+       struct xfrm_state *xs;
+       __be32 ipaddr[4];
+       u32 key[4];
+       u32 salt;
+       bool used;
+       bool crypt;
+       bool rx;
+};
+
+struct nsim_ipsec {
+       struct nsim_sa sa[NSIM_IPSEC_MAX_SA_COUNT];
+       struct dentry *pfile;
+       u32 count;
+       u32 tx;
+       u32 ok;
+};
+
 struct netdevsim {
        struct net_device *netdev;
 
@@ -37,6 +75,7 @@ struct netdevsim {
        struct u64_stats_sync syncp;
 
        struct device dev;
+       struct netdevsim_shared_dev *sdev;
 
        struct dentry *ddir;
 
@@ -46,16 +85,11 @@ struct netdevsim {
        struct bpf_prog *bpf_offloaded;
        u32 bpf_offloaded_id;
 
-       u32 xdp_flags;
-       int xdp_prog_mode;
-       struct bpf_prog *xdp_prog;
-
-       u32 prog_id_gen;
+       struct xdp_attachment_info xdp;
+       struct xdp_attachment_info xdp_hw;
 
        bool bpf_bind_accept;
        u32 bpf_bind_verifier_delay;
-       struct dentry *ddir_bpf_bound_progs;
-       struct list_head bpf_bound_progs;
 
        bool bpf_tc_accept;
        bool bpf_tc_non_bound_accept;
@@ -63,14 +97,12 @@ struct netdevsim {
        bool bpf_xdpoffload_accept;
 
        bool bpf_map_accept;
-       struct list_head bpf_bound_maps;
 #if IS_ENABLED(CONFIG_NET_DEVLINK)
        struct devlink *devlink;
 #endif
+       struct nsim_ipsec ipsec;
 };
 
-extern struct dentry *nsim_ddir;
-
 #ifdef CONFIG_BPF_SYSCALL
 int nsim_bpf_init(struct netdevsim *ns);
 void nsim_bpf_uninit(struct netdevsim *ns);
@@ -148,6 +180,25 @@ static inline void nsim_devlink_exit(void)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
+void nsim_ipsec_init(struct netdevsim *ns);
+void nsim_ipsec_teardown(struct netdevsim *ns);
+bool nsim_ipsec_tx(struct netdevsim *ns, struct sk_buff *skb);
+#else
+static inline void nsim_ipsec_init(struct netdevsim *ns)
+{
+}
+
+static inline void nsim_ipsec_teardown(struct netdevsim *ns)
+{
+}
+
+static inline bool nsim_ipsec_tx(struct netdevsim *ns, struct sk_buff *skb)
+{
+       return true;
+}
+#endif
+
 static inline struct netdevsim *to_nsim(struct device *ptr)
 {
        return container_of(ptr, struct netdevsim, dev);
index 9f6f7ccd44f775686dd4807d52b409be407026b9..b12023bc2cab5feb15ceedbe2fc357dfcf37627e 100644 (file)
@@ -430,7 +430,7 @@ static int ntb_netdev_probe(struct device *client_dev)
        ndev->hw_features = ndev->features;
        ndev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS);
 
-       random_ether_addr(ndev->perm_addr);
+       eth_random_addr(ndev->perm_addr);
        memcpy(ndev->dev_addr, ndev->perm_addr, ndev->addr_len);
 
        ndev->netdev_ops = &ntb_netdev_ops;
index 343989f9f9d981e201bedf66520f4be97567d6af..82070792edbb08c6af08fd79a256c53e59d01da4 100644 (file)
@@ -28,7 +28,7 @@ config MDIO_BCM_IPROC
 
 config MDIO_BCM_UNIMAC
        tristate "Broadcom UniMAC MDIO bus controller"
-       depends on HAS_IOMEM && OF_MDIO
+       depends on HAS_IOMEM
        help
          This module provides a driver for the Broadcom UniMAC MDIO busses.
          This hardware can be found in the Broadcom GENET Ethernet MAC
@@ -92,7 +92,8 @@ config MDIO_CAVIUM
 
 config MDIO_GPIO
        tristate "GPIO lib-based bitbanged MDIO buses"
-       depends on MDIO_BITBANG && GPIOLIB
+       depends on MDIO_BITBANG
+       depends on GPIOLIB || COMPILE_TEST
        ---help---
          Supports GPIO lib-based MDIO busses.
 
@@ -213,6 +214,7 @@ comment "MII PHY device drivers"
 config SFP
        tristate "SFP cage support"
        depends on I2C && PHYLINK
+       depends on HWMON || HWMON=n
        select MDIO_I2C
 
 config AMD_PHY
index 49ac678eb2dc7ca6539794b9ace40ba86aaa8d6a..78cad134a79ea3cff76683ff3d2e951d8792ea7d 100644 (file)
@@ -21,6 +21,7 @@
 #define MII_DP83811_SGMII_CTRL 0x09
 #define MII_DP83811_INT_STAT1  0x12
 #define MII_DP83811_INT_STAT2  0x13
+#define MII_DP83811_INT_STAT3  0x18
 #define MII_DP83811_RESET_CTRL 0x1f
 
 #define DP83811_HW_RESET       BIT(15)
 #define DP83811_OVERVOLTAGE_INT_EN     BIT(6)
 #define DP83811_UNDERVOLTAGE_INT_EN    BIT(7)
 
+/* INT_STAT3 bits */
+#define DP83811_LPS_INT_EN     BIT(0)
+#define DP83811_NO_FRAME_INT_EN        BIT(3)
+#define DP83811_POR_DONE_INT_EN        BIT(4)
+
 #define MII_DP83811_RXSOP1     0x04a5
 #define MII_DP83811_RXSOP2     0x04a6
 #define MII_DP83811_RXSOP3     0x04a7
@@ -81,6 +87,10 @@ static int dp83811_ack_interrupt(struct phy_device *phydev)
        if (err < 0)
                return err;
 
+       err = phy_read(phydev, MII_DP83811_INT_STAT3);
+       if (err < 0)
+               return err;
+
        return 0;
 }
 
@@ -216,6 +226,18 @@ static int dp83811_config_intr(struct phy_device *phydev)
                                DP83811_UNDERVOLTAGE_INT_EN);
 
                err = phy_write(phydev, MII_DP83811_INT_STAT2, misr_status);
+               if (err < 0)
+                       return err;
+
+               misr_status = phy_read(phydev, MII_DP83811_INT_STAT3);
+               if (misr_status < 0)
+                       return misr_status;
+
+               misr_status |= (DP83811_LPS_INT_EN |
+                               DP83811_NO_FRAME_INT_EN |
+                               DP83811_POR_DONE_INT_EN);
+
+               err = phy_write(phydev, MII_DP83811_INT_STAT3, misr_status);
 
        } else {
                err = phy_write(phydev, MII_DP83811_INT_STAT1, 0);
@@ -223,6 +245,10 @@ static int dp83811_config_intr(struct phy_device *phydev)
                        return err;
 
                err = phy_write(phydev, MII_DP83811_INT_STAT2, 0);
+               if (err < 0)
+                       return err;
+
+               err = phy_write(phydev, MII_DP83811_INT_STAT3, 0);
        }
 
        return err;
@@ -258,21 +284,19 @@ static int dp83811_config_init(struct phy_device *phydev)
        if (err < 0)
                return err;
 
+       value = phy_read(phydev, MII_DP83811_SGMII_CTRL);
        if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
-               value = phy_read(phydev, MII_DP83811_SGMII_CTRL);
-               if (!(value & DP83811_SGMII_EN)) {
-                       err = phy_write(phydev, MII_DP83811_SGMII_CTRL,
+               err = phy_write(phydev, MII_DP83811_SGMII_CTRL,
                                        (DP83811_SGMII_EN | value));
-                       if (err < 0)
-                               return err;
-               } else {
-                       err = phy_write(phydev, MII_DP83811_SGMII_CTRL,
-                                       (~DP83811_SGMII_EN & value));
-                       if (err < 0)
-                               return err;
-               }
+       } else {
+               err = phy_write(phydev, MII_DP83811_SGMII_CTRL,
+                               (~DP83811_SGMII_EN & value));
        }
 
+       if (err < 0)
+
+               return err;
+
        value = DP83811_WOL_MAGIC_EN | DP83811_WOL_SECURE_ON | DP83811_WOL_EN;
 
        return phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG,
index 001fe1df75572687e527c34eb608370d446bc670..67b260877f305a33c8a8b4aa37d5862478fe7835 100644 (file)
@@ -259,10 +259,8 @@ static int __init fixed_mdio_bus_init(void)
        int ret;
 
        pdev = platform_device_register_simple("Fixed MDIO bus", 0, NULL, 0);
-       if (IS_ERR(pdev)) {
-               ret = PTR_ERR(pdev);
-               goto err_pdev;
-       }
+       if (IS_ERR(pdev))
+               return PTR_ERR(pdev);
 
        fmb->mii_bus = mdiobus_alloc();
        if (fmb->mii_bus == NULL) {
@@ -287,7 +285,6 @@ err_mdiobus_alloc:
        mdiobus_free(fmb->mii_bus);
 err_mdiobus_reg:
        platform_device_unregister(pdev);
-err_pdev:
        return ret;
 }
 module_init(fixed_mdio_bus_init);
index 1cd439bdf6087af2913f589b499cd5c5abe5a3bb..f7c69ca34056e0e9846301b2637e6547688a91de 100644 (file)
@@ -679,7 +679,7 @@ static int m88e1116r_config_init(struct phy_device *phydev)
        if (err < 0)
                return err;
 
-       mdelay(500);
+       msleep(500);
 
        err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
        if (err < 0)
index 0c5b68e7da51aa8d0c7c73e7c6b3cc632b1e3510..c017486e9b86802dfc9834c52e2823b9b83ea248 100644 (file)
@@ -13,7 +13,7 @@
  * You should have received a copy of the GNU General Public License
  * version 2 (GPLv2) along with this source code.
  */
-
+#include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/device.h>
 #include <linux/of_mdio.h>
 #include <linux/mdio-mux.h>
 #include <linux/delay.h>
 
-#define MDIO_PARAM_OFFSET              0x00
+#define MDIO_RATE_ADJ_EXT_OFFSET       0x000
+#define MDIO_RATE_ADJ_INT_OFFSET       0x004
+#define MDIO_RATE_ADJ_DIVIDENT_SHIFT   16
+
+#define MDIO_SCAN_CTRL_OFFSET          0x008
+#define MDIO_SCAN_CTRL_OVRIDE_EXT_MSTR 28
+
+#define MDIO_PARAM_OFFSET              0x23c
 #define MDIO_PARAM_MIIM_CYCLE          29
 #define MDIO_PARAM_INTERNAL_SEL                25
 #define MDIO_PARAM_BUS_ID              22
 #define MDIO_PARAM_PHY_ID              16
 #define MDIO_PARAM_PHY_DATA            0
 
-#define MDIO_READ_OFFSET               0x04
+#define MDIO_READ_OFFSET               0x240
 #define MDIO_READ_DATA_MASK            0xffff
-#define MDIO_ADDR_OFFSET               0x08
+#define MDIO_ADDR_OFFSET               0x244
 
-#define MDIO_CTRL_OFFSET               0x0C
+#define MDIO_CTRL_OFFSET               0x248
 #define MDIO_CTRL_WRITE_OP             0x1
 #define MDIO_CTRL_READ_OP              0x2
 
-#define MDIO_STAT_OFFSET               0x10
+#define MDIO_STAT_OFFSET               0x24c
 #define MDIO_STAT_DONE                 1
 
 #define BUS_MAX_ADDR                   32
 #define EXT_BUS_START_ADDR             16
 
+#define MDIO_REG_ADDR_SPACE_SIZE       0x250
+
+#define MDIO_OPERATING_FREQUENCY       11000000
+#define MDIO_RATE_ADJ_DIVIDENT         1
+
 struct iproc_mdiomux_desc {
        void *mux_handle;
        void __iomem *base;
        struct device *dev;
        struct mii_bus *mii_bus;
+       struct clk *core_clk;
 };
 
+static void mdio_mux_iproc_config(struct iproc_mdiomux_desc *md)
+{
+       u32 divisor;
+       u32 val;
+
+       /* Disable external mdio master access */
+       val = readl(md->base + MDIO_SCAN_CTRL_OFFSET);
+       val |= BIT(MDIO_SCAN_CTRL_OVRIDE_EXT_MSTR);
+       writel(val, md->base + MDIO_SCAN_CTRL_OFFSET);
+
+       if (md->core_clk) {
+               /* use rate adjust regs to derrive the mdio's operating
+                * frequency from the specified core clock
+                */
+               divisor = clk_get_rate(md->core_clk) / MDIO_OPERATING_FREQUENCY;
+               divisor = divisor / (MDIO_RATE_ADJ_DIVIDENT + 1);
+               val = divisor;
+               val |= MDIO_RATE_ADJ_DIVIDENT << MDIO_RATE_ADJ_DIVIDENT_SHIFT;
+               writel(val, md->base + MDIO_RATE_ADJ_EXT_OFFSET);
+               writel(val, md->base + MDIO_RATE_ADJ_INT_OFFSET);
+       }
+}
+
 static int iproc_mdio_wait_for_idle(void __iomem *base, bool result)
 {
        unsigned int timeout = 1000; /* loop for 1s */
@@ -169,18 +205,39 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev)
        md->dev = &pdev->dev;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (res->start & 0xfff) {
+               /* For backward compatibility in case the
+                * base address is specified with an offset.
+                */
+               dev_info(&pdev->dev, "fix base address in dt-blob\n");
+               res->start &= ~0xfff;
+               res->end = res->start + MDIO_REG_ADDR_SPACE_SIZE - 1;
+       }
        md->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(md->base)) {
                dev_err(&pdev->dev, "failed to ioremap register\n");
                return PTR_ERR(md->base);
        }
 
-       md->mii_bus = mdiobus_alloc();
+       md->mii_bus = devm_mdiobus_alloc(&pdev->dev);
        if (!md->mii_bus) {
                dev_err(&pdev->dev, "mdiomux bus alloc failed\n");
                return -ENOMEM;
        }
 
+       md->core_clk = devm_clk_get(&pdev->dev, NULL);
+       if (md->core_clk == ERR_PTR(-ENOENT) ||
+           md->core_clk == ERR_PTR(-EINVAL))
+               md->core_clk = NULL;
+       else if (IS_ERR(md->core_clk))
+               return PTR_ERR(md->core_clk);
+
+       rc = clk_prepare_enable(md->core_clk);
+       if (rc) {
+               dev_err(&pdev->dev, "failed to enable core clk\n");
+               return rc;
+       }
+
        bus = md->mii_bus;
        bus->priv = md;
        bus->name = "iProc MDIO mux bus";
@@ -194,7 +251,7 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev)
        rc = mdiobus_register(bus);
        if (rc) {
                dev_err(&pdev->dev, "mdiomux registration failed\n");
-               goto out;
+               goto out_clk;
        }
 
        platform_set_drvdata(pdev, md);
@@ -206,13 +263,15 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev)
                goto out_register;
        }
 
+       mdio_mux_iproc_config(md);
+
        dev_info(md->dev, "iProc mdiomux registered\n");
        return 0;
 
 out_register:
        mdiobus_unregister(bus);
-out:
-       mdiobus_free(bus);
+out_clk:
+       clk_disable_unprepare(md->core_clk);
        return rc;
 }
 
@@ -222,11 +281,37 @@ static int mdio_mux_iproc_remove(struct platform_device *pdev)
 
        mdio_mux_uninit(md->mux_handle);
        mdiobus_unregister(md->mii_bus);
-       mdiobus_free(md->mii_bus);
+       clk_disable_unprepare(md->core_clk);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int mdio_mux_iproc_suspend(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
+
+       clk_disable_unprepare(md->core_clk);
 
        return 0;
 }
 
+static int mdio_mux_iproc_resume(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
+
+       clk_prepare_enable(md->core_clk);
+       mdio_mux_iproc_config(md);
+
+       return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(mdio_mux_iproc_pm_ops,
+                        mdio_mux_iproc_suspend, mdio_mux_iproc_resume);
+
 static const struct of_device_id mdio_mux_iproc_match[] = {
        {
                .compatible = "brcm,mdio-mux-iproc",
@@ -239,6 +324,7 @@ static struct platform_driver mdiomux_iproc_driver = {
        .driver = {
                .name           = "mdio-mux-iproc",
                .of_match_table = mdio_mux_iproc_match,
+               .pm             = &mdio_mux_iproc_pm_ops,
        },
        .probe          = mdio_mux_iproc_probe,
        .remove         = mdio_mux_iproc_remove,
index 082ffef0dec4e31e6f92f7b11ad020a7b9a16376..bc90764a8b8dcd7dd7140c20f39e8538d47de23e 100644 (file)
 struct mdio_mux_gpio_state {
        struct gpio_descs *gpios;
        void *mux_handle;
+       int values[];
 };
 
 static int mdio_mux_gpio_switch_fn(int current_child, int desired_child,
                                   void *data)
 {
        struct mdio_mux_gpio_state *s = data;
-       int values[s->gpios->ndescs];
        unsigned int n;
 
        if (current_child == desired_child)
                return 0;
 
        for (n = 0; n < s->gpios->ndescs; n++)
-               values[n] = (desired_child >> n) & 1;
+               s->values[n] = (desired_child >> n) & 1;
 
        gpiod_set_array_value_cansleep(s->gpios->ndescs, s->gpios->desc,
-                                      values);
+                                      s->values);
 
        return 0;
 }
@@ -44,15 +44,21 @@ static int mdio_mux_gpio_switch_fn(int current_child, int desired_child,
 static int mdio_mux_gpio_probe(struct platform_device *pdev)
 {
        struct mdio_mux_gpio_state *s;
+       struct gpio_descs *gpios;
        int r;
 
-       s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL);
-       if (!s)
+       gpios = gpiod_get_array(&pdev->dev, NULL, GPIOD_OUT_LOW);
+       if (IS_ERR(gpios))
+               return PTR_ERR(gpios);
+
+       s = devm_kzalloc(&pdev->dev, struct_size(s, values, gpios->ndescs),
+                        GFP_KERNEL);
+       if (!s) {
+               gpiod_put_array(gpios);
                return -ENOMEM;
+       }
 
-       s->gpios = gpiod_get_array(&pdev->dev, NULL, GPIOD_OUT_LOW);
-       if (IS_ERR(s->gpios))
-               return PTR_ERR(s->gpios);
+       s->gpios = gpios;
 
        r = mdio_mux_init(&pdev->dev, pdev->dev.of_node,
                          mdio_mux_gpio_switch_fn, &s->mux_handle, s, NULL);
index 650c2667d523d26ee0e9c122b9d8910e5ba1c0d8..84ca9ff40ae0b0bedb3758f4618e6652db845eb4 100644 (file)
@@ -123,7 +123,7 @@ static const struct vsc8531_edge_rate_table edge_table[] = {
 };
 #endif /* CONFIG_OF_MDIO */
 
-static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page)
+static int vsc85xx_phy_page_set(struct phy_device *phydev, u16 page)
 {
        int rc;
 
index 6c9b24fe31488b03499a9866f13a065449b29cce..1ee25877c4d163a15fb2d8c5e9f1c7f0d25389d2 100644 (file)
@@ -467,6 +467,20 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 }
 EXPORT_SYMBOL(phy_mii_ioctl);
 
+static int phy_config_aneg(struct phy_device *phydev)
+{
+       if (phydev->drv->config_aneg)
+               return phydev->drv->config_aneg(phydev);
+
+       /* Clause 45 PHYs that don't implement Clause 22 registers are not
+        * allowed to call genphy_config_aneg()
+        */
+       if (phydev->is_c45 && !(phydev->c45_ids.devices_in_package & BIT(0)))
+               return -EOPNOTSUPP;
+
+       return genphy_config_aneg(phydev);
+}
+
 /**
  * phy_start_aneg_priv - start auto-negotiation for this PHY device
  * @phydev: the phy_device struct
@@ -493,10 +507,7 @@ static int phy_start_aneg_priv(struct phy_device *phydev, bool sync)
        /* Invalidate LP advertising flags */
        phydev->lp_advertising = 0;
 
-       if (phydev->drv->config_aneg)
-               err = phydev->drv->config_aneg(phydev);
-       else
-               err = genphy_config_aneg(phydev);
+       err = phy_config_aneg(phydev);
        if (err < 0)
                goto out_unlock;
 
@@ -514,7 +525,7 @@ static int phy_start_aneg_priv(struct phy_device *phydev, bool sync)
         * negotiation may already be done and aneg interrupt may not be
         * generated.
         */
-       if (phydev->irq != PHY_POLL && phydev->state == PHY_AN) {
+       if (!phy_polling_mode(phydev) && phydev->state == PHY_AN) {
                err = phy_aneg_done(phydev);
                if (err > 0) {
                        trigger = true;
@@ -546,6 +557,84 @@ int phy_start_aneg(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_start_aneg);
 
+static int phy_poll_aneg_done(struct phy_device *phydev)
+{
+       unsigned int retries = 100;
+       int ret;
+
+       do {
+               msleep(100);
+               ret = phy_aneg_done(phydev);
+       } while (!ret && --retries);
+
+       if (!ret)
+               return -ETIMEDOUT;
+
+       return ret < 0 ? ret : 0;
+}
+
+/**
+ * phy_speed_down - set speed to lowest speed supported by both link partners
+ * @phydev: the phy_device struct
+ * @sync: perform action synchronously
+ *
+ * Description: Typically used to save energy when waiting for a WoL packet
+ *
+ * WARNING: Setting sync to false may cause the system being unable to suspend
+ * in case the PHY generates an interrupt when finishing the autonegotiation.
+ * This interrupt may wake up the system immediately after suspend.
+ * Therefore use sync = false only if you're sure it's safe with the respective
+ * network chip.
+ */
+int phy_speed_down(struct phy_device *phydev, bool sync)
+{
+       u32 adv = phydev->lp_advertising & phydev->supported;
+       u32 adv_old = phydev->advertising;
+       int ret;
+
+       if (phydev->autoneg != AUTONEG_ENABLE)
+               return 0;
+
+       if (adv & PHY_10BT_FEATURES)
+               phydev->advertising &= ~(PHY_100BT_FEATURES |
+                                        PHY_1000BT_FEATURES);
+       else if (adv & PHY_100BT_FEATURES)
+               phydev->advertising &= ~PHY_1000BT_FEATURES;
+
+       if (phydev->advertising == adv_old)
+               return 0;
+
+       ret = phy_config_aneg(phydev);
+       if (ret)
+               return ret;
+
+       return sync ? phy_poll_aneg_done(phydev) : 0;
+}
+EXPORT_SYMBOL_GPL(phy_speed_down);
+
+/**
+ * phy_speed_up - (re)set advertised speeds to all supported speeds
+ * @phydev: the phy_device struct
+ *
+ * Description: Used to revert the effect of phy_speed_down
+ */
+int phy_speed_up(struct phy_device *phydev)
+{
+       u32 mask = PHY_10BT_FEATURES | PHY_100BT_FEATURES | PHY_1000BT_FEATURES;
+       u32 adv_old = phydev->advertising;
+
+       if (phydev->autoneg != AUTONEG_ENABLE)
+               return 0;
+
+       phydev->advertising = (adv_old & ~mask) | (phydev->supported & mask);
+
+       if (phydev->advertising == adv_old)
+               return 0;
+
+       return phy_config_aneg(phydev);
+}
+EXPORT_SYMBOL_GPL(phy_speed_up);
+
 /**
  * phy_start_machine - start PHY state machine tracking
  * @phydev: the phy_device struct
@@ -894,7 +983,7 @@ void phy_state_machine(struct work_struct *work)
                        needs_aneg = true;
                break;
        case PHY_NOLINK:
-               if (phydev->irq != PHY_POLL)
+               if (!phy_polling_mode(phydev))
                        break;
 
                err = phy_read_status(phydev);
@@ -935,7 +1024,7 @@ void phy_state_machine(struct work_struct *work)
                /* Only register a CHANGE if we are polling and link changed
                 * since latest checking.
                 */
-               if (phydev->irq == PHY_POLL) {
+               if (phy_polling_mode(phydev)) {
                        old_link = phydev->link;
                        err = phy_read_status(phydev);
                        if (err)
@@ -1034,7 +1123,7 @@ void phy_state_machine(struct work_struct *work)
         * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
         * between states from phy_mac_interrupt()
         */
-       if (phydev->irq == PHY_POLL)
+       if (phy_polling_mode(phydev))
                queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
                                   PHY_STATE_TIME * HZ);
 }
index b9f5f40a7ac1e6640a653e8207cdd8885100e09f..db1172db1e7cb7df0fed8a21b0a7757ae6c068b5 100644 (file)
@@ -1555,6 +1555,14 @@ int genphy_read_status(struct phy_device *phydev)
                        if (adv < 0)
                                return adv;
 
+                       if (lpagb & LPA_1000MSFAIL) {
+                               if (adv & CTL1000_ENABLE_MASTER)
+                                       phydev_err(phydev, "Master/Slave resolution failed, maybe conflicting manual settings?\n");
+                               else
+                                       phydev_err(phydev, "Master/Slave resolution failed\n");
+                               return -ENOLINK;
+                       }
+
                        phydev->lp_advertising =
                                mii_stat1000_to_ethtool_lpa_t(lpagb);
                        common_adv_gb = lpagb & adv << 2;
index 082fb40c656dc8b9da48cda1d65b58802ca0259f..7fc8508b5231d94beab4c45bf7666d15d4ef786f 100644 (file)
@@ -37,6 +37,9 @@
 #define RTL8201F_ISR                           0x1e
 #define RTL8201F_IER                           0x13
 
+#define RTL8366RB_POWER_SAVE                   0x15
+#define RTL8366RB_POWER_SAVE_ON                        BIT(12)
+
 MODULE_DESCRIPTION("Realtek PHY driver");
 MODULE_AUTHOR("Johnson Leung");
 MODULE_LICENSE("GPL");
@@ -128,6 +131,37 @@ static int rtl8211f_config_intr(struct phy_device *phydev)
        return phy_write_paged(phydev, 0xa42, RTL821x_INER, val);
 }
 
+static int rtl8211_config_aneg(struct phy_device *phydev)
+{
+       int ret;
+
+       ret = genphy_config_aneg(phydev);
+       if (ret < 0)
+               return ret;
+
+       /* Quirk was copied from vendor driver. Unfortunately it includes no
+        * description of the magic numbers.
+        */
+       if (phydev->speed == SPEED_100 && phydev->autoneg == AUTONEG_DISABLE) {
+               phy_write(phydev, 0x17, 0x2138);
+               phy_write(phydev, 0x0e, 0x0260);
+       } else {
+               phy_write(phydev, 0x17, 0x2108);
+               phy_write(phydev, 0x0e, 0x0000);
+       }
+
+       return 0;
+}
+
+static int rtl8211c_config_init(struct phy_device *phydev)
+{
+       /* RTL8211C has an issue when operating in Gigabit slave mode */
+       phy_set_bits(phydev, MII_CTRL1000,
+                    CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER);
+
+       return genphy_config_init(phydev);
+}
+
 static int rtl8211f_config_init(struct phy_device *phydev)
 {
        int ret;
@@ -159,6 +193,24 @@ static int rtl8211b_resume(struct phy_device *phydev)
        return genphy_resume(phydev);
 }
 
+static int rtl8366rb_config_init(struct phy_device *phydev)
+{
+       int ret;
+
+       ret = genphy_config_init(phydev);
+       if (ret < 0)
+               return ret;
+
+       ret = phy_set_bits(phydev, RTL8366RB_POWER_SAVE,
+                          RTL8366RB_POWER_SAVE_ON);
+       if (ret) {
+               dev_err(&phydev->mdio.dev,
+                       "error enabling power management\n");
+       }
+
+       return ret;
+}
+
 static struct phy_driver realtek_drvs[] = {
        {
                .phy_id         = 0x00008201,
@@ -178,6 +230,14 @@ static struct phy_driver realtek_drvs[] = {
                .resume         = genphy_resume,
                .read_page      = rtl821x_read_page,
                .write_page     = rtl821x_write_page,
+       }, {
+               .phy_id         = 0x001cc910,
+               .name           = "RTL8211 Gigabit Ethernet",
+               .phy_id_mask    = 0x001fffff,
+               .features       = PHY_GBIT_FEATURES,
+               .config_aneg    = rtl8211_config_aneg,
+               .read_mmd       = &genphy_read_mmd_unsupported,
+               .write_mmd      = &genphy_write_mmd_unsupported,
        }, {
                .phy_id         = 0x001cc912,
                .name           = "RTL8211B Gigabit Ethernet",
@@ -190,6 +250,14 @@ static struct phy_driver realtek_drvs[] = {
                .write_mmd      = &genphy_write_mmd_unsupported,
                .suspend        = rtl8211b_suspend,
                .resume         = rtl8211b_resume,
+       }, {
+               .phy_id         = 0x001cc913,
+               .name           = "RTL8211C Gigabit Ethernet",
+               .phy_id_mask    = 0x001fffff,
+               .features       = PHY_GBIT_FEATURES,
+               .config_init    = rtl8211c_config_init,
+               .read_mmd       = &genphy_read_mmd_unsupported,
+               .write_mmd      = &genphy_write_mmd_unsupported,
        }, {
                .phy_id         = 0x001cc914,
                .name           = "RTL8211DN Gigabit Ethernet",
@@ -223,6 +291,15 @@ static struct phy_driver realtek_drvs[] = {
                .resume         = genphy_resume,
                .read_page      = rtl821x_read_page,
                .write_page     = rtl821x_write_page,
+       }, {
+               .phy_id         = 0x001cc961,
+               .name           = "RTL8366RB Gigabit Ethernet",
+               .phy_id_mask    = 0x001fffff,
+               .features       = PHY_GBIT_FEATURES,
+               .flags          = PHY_HAS_INTERRUPT,
+               .config_init    = &rtl8366rb_config_init,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
        },
 };
 
@@ -230,10 +307,13 @@ module_phy_driver(realtek_drvs);
 
 static struct mdio_device_id __maybe_unused realtek_tbl[] = {
        { 0x001cc816, 0x001fffff },
+       { 0x001cc910, 0x001fffff },
        { 0x001cc912, 0x001fffff },
+       { 0x001cc913, 0x001fffff },
        { 0x001cc914, 0x001fffff },
        { 0x001cc915, 0x001fffff },
        { 0x001cc916, 0x001fffff },
+       { 0x001cc961, 0x001fffff },
        { }
 };
 
index c4c92db86dfa8449e4416cdf8b1e0cc70f2c2d39..5661226cf75b129b15877747ced78be732240779 100644 (file)
@@ -1,5 +1,7 @@
+#include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/hwmon.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
@@ -131,6 +133,12 @@ struct sfp {
        unsigned int sm_retries;
 
        struct sfp_eeprom_id id;
+#if IS_ENABLED(CONFIG_HWMON)
+       struct sfp_diag diag;
+       struct device *hwmon_dev;
+       char *hwmon_name;
+#endif
+
 };
 
 static bool sff_module_supported(const struct sfp_eeprom_id *id)
@@ -316,6 +324,719 @@ static unsigned int sfp_check(void *buf, size_t len)
        return check;
 }
 
+/* hwmon */
+#if IS_ENABLED(CONFIG_HWMON)
+static umode_t sfp_hwmon_is_visible(const void *data,
+                                   enum hwmon_sensor_types type,
+                                   u32 attr, int channel)
+{
+       const struct sfp *sfp = data;
+
+       switch (type) {
+       case hwmon_temp:
+               switch (attr) {
+               case hwmon_temp_input:
+               case hwmon_temp_min_alarm:
+               case hwmon_temp_max_alarm:
+               case hwmon_temp_lcrit_alarm:
+               case hwmon_temp_crit_alarm:
+               case hwmon_temp_min:
+               case hwmon_temp_max:
+               case hwmon_temp_lcrit:
+               case hwmon_temp_crit:
+                       return 0444;
+               default:
+                       return 0;
+               }
+       case hwmon_in:
+               switch (attr) {
+               case hwmon_in_input:
+               case hwmon_in_min_alarm:
+               case hwmon_in_max_alarm:
+               case hwmon_in_lcrit_alarm:
+               case hwmon_in_crit_alarm:
+               case hwmon_in_min:
+               case hwmon_in_max:
+               case hwmon_in_lcrit:
+               case hwmon_in_crit:
+                       return 0444;
+               default:
+                       return 0;
+               }
+       case hwmon_curr:
+               switch (attr) {
+               case hwmon_curr_input:
+               case hwmon_curr_min_alarm:
+               case hwmon_curr_max_alarm:
+               case hwmon_curr_lcrit_alarm:
+               case hwmon_curr_crit_alarm:
+               case hwmon_curr_min:
+               case hwmon_curr_max:
+               case hwmon_curr_lcrit:
+               case hwmon_curr_crit:
+                       return 0444;
+               default:
+                       return 0;
+               }
+       case hwmon_power:
+               /* External calibration of receive power requires
+                * floating point arithmetic. Doing that in the kernel
+                * is not easy, so just skip it. If the module does
+                * not require external calibration, we can however
+                * show receiver power, since FP is then not needed.
+                */
+               if (sfp->id.ext.diagmon & SFP_DIAGMON_EXT_CAL &&
+                   channel == 1)
+                       return 0;
+               switch (attr) {
+               case hwmon_power_input:
+               case hwmon_power_min_alarm:
+               case hwmon_power_max_alarm:
+               case hwmon_power_lcrit_alarm:
+               case hwmon_power_crit_alarm:
+               case hwmon_power_min:
+               case hwmon_power_max:
+               case hwmon_power_lcrit:
+               case hwmon_power_crit:
+                       return 0444;
+               default:
+                       return 0;
+               }
+       default:
+               return 0;
+       }
+}
+
+static int sfp_hwmon_read_sensor(struct sfp *sfp, int reg, long *value)
+{
+       __be16 val;
+       int err;
+
+       err = sfp_read(sfp, true, reg, &val, sizeof(val));
+       if (err < 0)
+               return err;
+
+       *value = be16_to_cpu(val);
+
+       return 0;
+}
+
+static void sfp_hwmon_to_rx_power(long *value)
+{
+       *value = DIV_ROUND_CLOSEST(*value, 100);
+}
+
+static void sfp_hwmon_calibrate(struct sfp *sfp, unsigned int slope, int offset,
+                               long *value)
+{
+       if (sfp->id.ext.diagmon & SFP_DIAGMON_EXT_CAL)
+               *value = DIV_ROUND_CLOSEST(*value * slope, 256) + offset;
+}
+
+static void sfp_hwmon_calibrate_temp(struct sfp *sfp, long *value)
+{
+       sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_t_slope),
+                           be16_to_cpu(sfp->diag.cal_t_offset), value);
+
+       if (*value >= 0x8000)
+               *value -= 0x10000;
+
+       *value = DIV_ROUND_CLOSEST(*value * 1000, 256);
+}
+
+static void sfp_hwmon_calibrate_vcc(struct sfp *sfp, long *value)
+{
+       sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_v_slope),
+                           be16_to_cpu(sfp->diag.cal_v_offset), value);
+
+       *value = DIV_ROUND_CLOSEST(*value, 10);
+}
+
+static void sfp_hwmon_calibrate_bias(struct sfp *sfp, long *value)
+{
+       sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_txi_slope),
+                           be16_to_cpu(sfp->diag.cal_txi_offset), value);
+
+       *value = DIV_ROUND_CLOSEST(*value, 500);
+}
+
+static void sfp_hwmon_calibrate_tx_power(struct sfp *sfp, long *value)
+{
+       sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_txpwr_slope),
+                           be16_to_cpu(sfp->diag.cal_txpwr_offset), value);
+
+       *value = DIV_ROUND_CLOSEST(*value, 10);
+}
+
+static int sfp_hwmon_read_temp(struct sfp *sfp, int reg, long *value)
+{
+       int err;
+
+       err = sfp_hwmon_read_sensor(sfp, reg, value);
+       if (err < 0)
+               return err;
+
+       sfp_hwmon_calibrate_temp(sfp, value);
+
+       return 0;
+}
+
+static int sfp_hwmon_read_vcc(struct sfp *sfp, int reg, long *value)
+{
+       int err;
+
+       err = sfp_hwmon_read_sensor(sfp, reg, value);
+       if (err < 0)
+               return err;
+
+       sfp_hwmon_calibrate_vcc(sfp, value);
+
+       return 0;
+}
+
+static int sfp_hwmon_read_bias(struct sfp *sfp, int reg, long *value)
+{
+       int err;
+
+       err = sfp_hwmon_read_sensor(sfp, reg, value);
+       if (err < 0)
+               return err;
+
+       sfp_hwmon_calibrate_bias(sfp, value);
+
+       return 0;
+}
+
+static int sfp_hwmon_read_tx_power(struct sfp *sfp, int reg, long *value)
+{
+       int err;
+
+       err = sfp_hwmon_read_sensor(sfp, reg, value);
+       if (err < 0)
+               return err;
+
+       sfp_hwmon_calibrate_tx_power(sfp, value);
+
+       return 0;
+}
+
+static int sfp_hwmon_read_rx_power(struct sfp *sfp, int reg, long *value)
+{
+       int err;
+
+       err = sfp_hwmon_read_sensor(sfp, reg, value);
+       if (err < 0)
+               return err;
+
+       sfp_hwmon_to_rx_power(value);
+
+       return 0;
+}
+
+static int sfp_hwmon_temp(struct sfp *sfp, u32 attr, long *value)
+{
+       u8 status;
+       int err;
+
+       switch (attr) {
+       case hwmon_temp_input:
+               return sfp_hwmon_read_temp(sfp, SFP_TEMP, value);
+
+       case hwmon_temp_lcrit:
+               *value = be16_to_cpu(sfp->diag.temp_low_alarm);
+               sfp_hwmon_calibrate_temp(sfp, value);
+               return 0;
+
+       case hwmon_temp_min:
+               *value = be16_to_cpu(sfp->diag.temp_low_warn);
+               sfp_hwmon_calibrate_temp(sfp, value);
+               return 0;
+       case hwmon_temp_max:
+               *value = be16_to_cpu(sfp->diag.temp_high_warn);
+               sfp_hwmon_calibrate_temp(sfp, value);
+               return 0;
+
+       case hwmon_temp_crit:
+               *value = be16_to_cpu(sfp->diag.temp_high_alarm);
+               sfp_hwmon_calibrate_temp(sfp, value);
+               return 0;
+
+       case hwmon_temp_lcrit_alarm:
+               err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_ALARM0_TEMP_LOW);
+               return 0;
+
+       case hwmon_temp_min_alarm:
+               err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_WARN0_TEMP_LOW);
+               return 0;
+
+       case hwmon_temp_max_alarm:
+               err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_WARN0_TEMP_HIGH);
+               return 0;
+
+       case hwmon_temp_crit_alarm:
+               err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_ALARM0_TEMP_HIGH);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_vcc(struct sfp *sfp, u32 attr, long *value)
+{
+       u8 status;
+       int err;
+
+       switch (attr) {
+       case hwmon_in_input:
+               return sfp_hwmon_read_vcc(sfp, SFP_VCC, value);
+
+       case hwmon_in_lcrit:
+               *value = be16_to_cpu(sfp->diag.volt_low_alarm);
+               sfp_hwmon_calibrate_vcc(sfp, value);
+               return 0;
+
+       case hwmon_in_min:
+               *value = be16_to_cpu(sfp->diag.volt_low_warn);
+               sfp_hwmon_calibrate_vcc(sfp, value);
+               return 0;
+
+       case hwmon_in_max:
+               *value = be16_to_cpu(sfp->diag.volt_high_warn);
+               sfp_hwmon_calibrate_vcc(sfp, value);
+               return 0;
+
+       case hwmon_in_crit:
+               *value = be16_to_cpu(sfp->diag.volt_high_alarm);
+               sfp_hwmon_calibrate_vcc(sfp, value);
+               return 0;
+
+       case hwmon_in_lcrit_alarm:
+               err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_ALARM0_VCC_LOW);
+               return 0;
+
+       case hwmon_in_min_alarm:
+               err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_WARN0_VCC_LOW);
+               return 0;
+
+       case hwmon_in_max_alarm:
+               err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_WARN0_VCC_HIGH);
+               return 0;
+
+       case hwmon_in_crit_alarm:
+               err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_ALARM0_VCC_HIGH);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_bias(struct sfp *sfp, u32 attr, long *value)
+{
+       u8 status;
+       int err;
+
+       switch (attr) {
+       case hwmon_curr_input:
+               return sfp_hwmon_read_bias(sfp, SFP_TX_BIAS, value);
+
+       case hwmon_curr_lcrit:
+               *value = be16_to_cpu(sfp->diag.bias_low_alarm);
+               sfp_hwmon_calibrate_bias(sfp, value);
+               return 0;
+
+       case hwmon_curr_min:
+               *value = be16_to_cpu(sfp->diag.bias_low_warn);
+               sfp_hwmon_calibrate_bias(sfp, value);
+               return 0;
+
+       case hwmon_curr_max:
+               *value = be16_to_cpu(sfp->diag.bias_high_warn);
+               sfp_hwmon_calibrate_bias(sfp, value);
+               return 0;
+
+       case hwmon_curr_crit:
+               *value = be16_to_cpu(sfp->diag.bias_high_alarm);
+               sfp_hwmon_calibrate_bias(sfp, value);
+               return 0;
+
+       case hwmon_curr_lcrit_alarm:
+               err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_ALARM0_TX_BIAS_LOW);
+               return 0;
+
+       case hwmon_curr_min_alarm:
+               err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_WARN0_TX_BIAS_LOW);
+               return 0;
+
+       case hwmon_curr_max_alarm:
+               err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_WARN0_TX_BIAS_HIGH);
+               return 0;
+
+       case hwmon_curr_crit_alarm:
+               err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_ALARM0_TX_BIAS_HIGH);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_tx_power(struct sfp *sfp, u32 attr, long *value)
+{
+       u8 status;
+       int err;
+
+       switch (attr) {
+       case hwmon_power_input:
+               return sfp_hwmon_read_tx_power(sfp, SFP_TX_POWER, value);
+
+       case hwmon_power_lcrit:
+               *value = be16_to_cpu(sfp->diag.txpwr_low_alarm);
+               sfp_hwmon_calibrate_tx_power(sfp, value);
+               return 0;
+
+       case hwmon_power_min:
+               *value = be16_to_cpu(sfp->diag.txpwr_low_warn);
+               sfp_hwmon_calibrate_tx_power(sfp, value);
+               return 0;
+
+       case hwmon_power_max:
+               *value = be16_to_cpu(sfp->diag.txpwr_high_warn);
+               sfp_hwmon_calibrate_tx_power(sfp, value);
+               return 0;
+
+       case hwmon_power_crit:
+               *value = be16_to_cpu(sfp->diag.txpwr_high_alarm);
+               sfp_hwmon_calibrate_tx_power(sfp, value);
+               return 0;
+
+       case hwmon_power_lcrit_alarm:
+               err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_ALARM0_TXPWR_LOW);
+               return 0;
+
+       case hwmon_power_min_alarm:
+               err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_WARN0_TXPWR_LOW);
+               return 0;
+
+       case hwmon_power_max_alarm:
+               err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_WARN0_TXPWR_HIGH);
+               return 0;
+
+       case hwmon_power_crit_alarm:
+               err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_ALARM0_TXPWR_HIGH);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_rx_power(struct sfp *sfp, u32 attr, long *value)
+{
+       u8 status;
+       int err;
+
+       switch (attr) {
+       case hwmon_power_input:
+               return sfp_hwmon_read_rx_power(sfp, SFP_RX_POWER, value);
+
+       case hwmon_power_lcrit:
+               *value = be16_to_cpu(sfp->diag.rxpwr_low_alarm);
+               sfp_hwmon_to_rx_power(value);
+               return 0;
+
+       case hwmon_power_min:
+               *value = be16_to_cpu(sfp->diag.rxpwr_low_warn);
+               sfp_hwmon_to_rx_power(value);
+               return 0;
+
+       case hwmon_power_max:
+               *value = be16_to_cpu(sfp->diag.rxpwr_high_warn);
+               sfp_hwmon_to_rx_power(value);
+               return 0;
+
+       case hwmon_power_crit:
+               *value = be16_to_cpu(sfp->diag.rxpwr_high_alarm);
+               sfp_hwmon_to_rx_power(value);
+               return 0;
+
+       case hwmon_power_lcrit_alarm:
+               err = sfp_read(sfp, true, SFP_ALARM1, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_ALARM1_RXPWR_LOW);
+               return 0;
+
+       case hwmon_power_min_alarm:
+               err = sfp_read(sfp, true, SFP_WARN1, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_WARN1_RXPWR_LOW);
+               return 0;
+
+       case hwmon_power_max_alarm:
+               err = sfp_read(sfp, true, SFP_WARN1, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_WARN1_RXPWR_HIGH);
+               return 0;
+
+       case hwmon_power_crit_alarm:
+               err = sfp_read(sfp, true, SFP_ALARM1, &status, sizeof(status));
+               if (err < 0)
+                       return err;
+
+               *value = !!(status & SFP_ALARM1_RXPWR_HIGH);
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+                         u32 attr, int channel, long *value)
+{
+       struct sfp *sfp = dev_get_drvdata(dev);
+
+       switch (type) {
+       case hwmon_temp:
+               return sfp_hwmon_temp(sfp, attr, value);
+       case hwmon_in:
+               return sfp_hwmon_vcc(sfp, attr, value);
+       case hwmon_curr:
+               return sfp_hwmon_bias(sfp, attr, value);
+       case hwmon_power:
+               switch (channel) {
+               case 0:
+                       return sfp_hwmon_tx_power(sfp, attr, value);
+               case 1:
+                       return sfp_hwmon_rx_power(sfp, attr, value);
+               default:
+                       return -EOPNOTSUPP;
+               }
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static const struct hwmon_ops sfp_hwmon_ops = {
+       .is_visible = sfp_hwmon_is_visible,
+       .read = sfp_hwmon_read,
+};
+
+static u32 sfp_hwmon_chip_config[] = {
+       HWMON_C_REGISTER_TZ,
+       0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_chip = {
+       .type = hwmon_chip,
+       .config = sfp_hwmon_chip_config,
+};
+
+static u32 sfp_hwmon_temp_config[] = {
+       HWMON_T_INPUT |
+       HWMON_T_MAX | HWMON_T_MIN |
+       HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM |
+       HWMON_T_CRIT | HWMON_T_LCRIT |
+       HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM,
+       0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_temp_channel_info = {
+       .type = hwmon_temp,
+       .config = sfp_hwmon_temp_config,
+};
+
+static u32 sfp_hwmon_vcc_config[] = {
+       HWMON_I_INPUT |
+       HWMON_I_MAX | HWMON_I_MIN |
+       HWMON_I_MAX_ALARM | HWMON_I_MIN_ALARM |
+       HWMON_I_CRIT | HWMON_I_LCRIT |
+       HWMON_I_CRIT_ALARM | HWMON_I_LCRIT_ALARM,
+       0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_vcc_channel_info = {
+       .type = hwmon_in,
+       .config = sfp_hwmon_vcc_config,
+};
+
+static u32 sfp_hwmon_bias_config[] = {
+       HWMON_C_INPUT |
+       HWMON_C_MAX | HWMON_C_MIN |
+       HWMON_C_MAX_ALARM | HWMON_C_MIN_ALARM |
+       HWMON_C_CRIT | HWMON_C_LCRIT |
+       HWMON_C_CRIT_ALARM | HWMON_C_LCRIT_ALARM,
+       0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_bias_channel_info = {
+       .type = hwmon_curr,
+       .config = sfp_hwmon_bias_config,
+};
+
+static u32 sfp_hwmon_power_config[] = {
+       /* Transmit power */
+       HWMON_P_INPUT |
+       HWMON_P_MAX | HWMON_P_MIN |
+       HWMON_P_MAX_ALARM | HWMON_P_MIN_ALARM |
+       HWMON_P_CRIT | HWMON_P_LCRIT |
+       HWMON_P_CRIT_ALARM | HWMON_P_LCRIT_ALARM,
+       /* Receive power */
+       HWMON_P_INPUT |
+       HWMON_P_MAX | HWMON_P_MIN |
+       HWMON_P_MAX_ALARM | HWMON_P_MIN_ALARM |
+       HWMON_P_CRIT | HWMON_P_LCRIT |
+       HWMON_P_CRIT_ALARM | HWMON_P_LCRIT_ALARM,
+       0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_power_channel_info = {
+       .type = hwmon_power,
+       .config = sfp_hwmon_power_config,
+};
+
+static const struct hwmon_channel_info *sfp_hwmon_info[] = {
+       &sfp_hwmon_chip,
+       &sfp_hwmon_vcc_channel_info,
+       &sfp_hwmon_temp_channel_info,
+       &sfp_hwmon_bias_channel_info,
+       &sfp_hwmon_power_channel_info,
+       NULL,
+};
+
+static const struct hwmon_chip_info sfp_hwmon_chip_info = {
+       .ops = &sfp_hwmon_ops,
+       .info = sfp_hwmon_info,
+};
+
+static int sfp_hwmon_insert(struct sfp *sfp)
+{
+       int err, i;
+
+       if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE)
+               return 0;
+
+       if (!(sfp->id.ext.diagmon & SFP_DIAGMON_DDM))
+               return 0;
+
+       if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)
+               /* This driver in general does not support address
+                * change.
+                */
+               return 0;
+
+       err = sfp_read(sfp, true, 0, &sfp->diag, sizeof(sfp->diag));
+       if (err < 0)
+               return err;
+
+       sfp->hwmon_name = kstrdup(dev_name(sfp->dev), GFP_KERNEL);
+       if (!sfp->hwmon_name)
+               return -ENODEV;
+
+       for (i = 0; sfp->hwmon_name[i]; i++)
+               if (hwmon_is_bad_char(sfp->hwmon_name[i]))
+                       sfp->hwmon_name[i] = '_';
+
+       sfp->hwmon_dev = hwmon_device_register_with_info(sfp->dev,
+                                                        sfp->hwmon_name, sfp,
+                                                        &sfp_hwmon_chip_info,
+                                                        NULL);
+
+       return PTR_ERR_OR_ZERO(sfp->hwmon_dev);
+}
+
+static void sfp_hwmon_remove(struct sfp *sfp)
+{
+       hwmon_device_unregister(sfp->hwmon_dev);
+       kfree(sfp->hwmon_name);
+}
+#else
+static int sfp_hwmon_insert(struct sfp *sfp)
+{
+       return 0;
+}
+
+static void sfp_hwmon_remove(struct sfp *sfp)
+{
+}
+#endif
+
 /* Helpers */
 static void sfp_module_tx_disable(struct sfp *sfp)
 {
@@ -636,6 +1357,10 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
                dev_warn(sfp->dev,
                         "module address swap to access page 0xA2 is not supported.\n");
 
+       ret = sfp_hwmon_insert(sfp);
+       if (ret < 0)
+               return ret;
+
        ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
        if (ret < 0)
                return ret;
@@ -647,6 +1372,8 @@ static void sfp_sm_mod_remove(struct sfp *sfp)
 {
        sfp_module_remove(sfp->sfp_bus);
 
+       sfp_hwmon_remove(sfp);
+
        if (sfp->mod_phy)
                sfp_sm_phy_detach(sfp);
 
index d9dd8fbfffc795d6e1023f542aa63e59a0b37b4c..fbf9ad429593ce6ab08a1d17ce7be726eecce814 100644 (file)
 #define PHY_ID_VSC8572                 0x000704d0
 #define PHY_ID_VSC8574                 0x000704a0
 #define PHY_ID_VSC8601                 0x00070420
+#define PHY_ID_VSC7385                 0x00070450
+#define PHY_ID_VSC7388                 0x00070480
+#define PHY_ID_VSC7395                 0x00070550
+#define PHY_ID_VSC7398                 0x00070580
 #define PHY_ID_VSC8662                 0x00070660
 #define PHY_ID_VSC8221                 0x000fc550
 #define PHY_ID_VSC8211                 0x000fc4b0
@@ -116,6 +120,137 @@ static int vsc824x_config_init(struct phy_device *phydev)
        return err;
 }
 
+#define VSC73XX_EXT_PAGE_ACCESS 0x1f
+
+static int vsc73xx_read_page(struct phy_device *phydev)
+{
+       return __phy_read(phydev, VSC73XX_EXT_PAGE_ACCESS);
+}
+
+static int vsc73xx_write_page(struct phy_device *phydev, int page)
+{
+       return __phy_write(phydev, VSC73XX_EXT_PAGE_ACCESS, page);
+}
+
+static void vsc73xx_config_init(struct phy_device *phydev)
+{
+       /* Receiver init */
+       phy_write(phydev, 0x1f, 0x2a30);
+       phy_modify(phydev, 0x0c, 0x0300, 0x0200);
+       phy_write(phydev, 0x1f, 0x0000);
+
+       /* Config LEDs 0x61 */
+       phy_modify(phydev, MII_TPISTATUS, 0xff00, 0x0061);
+}
+
+static int vsc738x_config_init(struct phy_device *phydev)
+{
+       u16 rev;
+       /* This magic sequence appear in the application note
+        * "VSC7385/7388 PHY Configuration".
+        *
+        * Maybe one day we will get to know what it all means.
+        */
+       phy_write(phydev, 0x1f, 0x2a30);
+       phy_modify(phydev, 0x08, 0x0200, 0x0200);
+       phy_write(phydev, 0x1f, 0x52b5);
+       phy_write(phydev, 0x10, 0xb68a);
+       phy_modify(phydev, 0x12, 0xff07, 0x0003);
+       phy_modify(phydev, 0x11, 0x00ff, 0x00a2);
+       phy_write(phydev, 0x10, 0x968a);
+       phy_write(phydev, 0x1f, 0x2a30);
+       phy_modify(phydev, 0x08, 0x0200, 0x0000);
+       phy_write(phydev, 0x1f, 0x0000);
+
+       /* Read revision */
+       rev = phy_read(phydev, MII_PHYSID2);
+       rev &= 0x0f;
+
+       /* Special quirk for revision 0 */
+       if (rev == 0) {
+               phy_write(phydev, 0x1f, 0x2a30);
+               phy_modify(phydev, 0x08, 0x0200, 0x0200);
+               phy_write(phydev, 0x1f, 0x52b5);
+               phy_write(phydev, 0x12, 0x0000);
+               phy_write(phydev, 0x11, 0x0689);
+               phy_write(phydev, 0x10, 0x8f92);
+               phy_write(phydev, 0x1f, 0x52b5);
+               phy_write(phydev, 0x12, 0x0000);
+               phy_write(phydev, 0x11, 0x0e35);
+               phy_write(phydev, 0x10, 0x9786);
+               phy_write(phydev, 0x1f, 0x2a30);
+               phy_modify(phydev, 0x08, 0x0200, 0x0000);
+               phy_write(phydev, 0x17, 0xff80);
+               phy_write(phydev, 0x17, 0x0000);
+       }
+
+       phy_write(phydev, 0x1f, 0x0000);
+       phy_write(phydev, 0x12, 0x0048);
+
+       if (rev == 0) {
+               phy_write(phydev, 0x1f, 0x2a30);
+               phy_write(phydev, 0x14, 0x6600);
+               phy_write(phydev, 0x1f, 0x0000);
+               phy_write(phydev, 0x18, 0xa24e);
+       } else {
+               phy_write(phydev, 0x1f, 0x2a30);
+               phy_modify(phydev, 0x16, 0x0fc0, 0x0240);
+               phy_modify(phydev, 0x14, 0x6000, 0x4000);
+               /* bits 14-15 in extended register 0x14 controls DACG amplitude
+                * 6 = -8%, 2 is hardware default
+                */
+               phy_write(phydev, 0x1f, 0x0001);
+               phy_modify(phydev, 0x14, 0xe000, 0x6000);
+               phy_write(phydev, 0x1f, 0x0000);
+       }
+
+       vsc73xx_config_init(phydev);
+
+       return genphy_config_init(phydev);
+}
+
+static int vsc739x_config_init(struct phy_device *phydev)
+{
+       /* This magic sequence appears in the VSC7395 SparX-G5e application
+        * note "VSC7395/VSC7398 PHY Configuration"
+        *
+        * Maybe one day we will get to know what it all means.
+        */
+       phy_write(phydev, 0x1f, 0x2a30);
+       phy_modify(phydev, 0x08, 0x0200, 0x0200);
+       phy_write(phydev, 0x1f, 0x52b5);
+       phy_write(phydev, 0x10, 0xb68a);
+       phy_modify(phydev, 0x12, 0xff07, 0x0003);
+       phy_modify(phydev, 0x11, 0x00ff, 0x00a2);
+       phy_write(phydev, 0x10, 0x968a);
+       phy_write(phydev, 0x1f, 0x2a30);
+       phy_modify(phydev, 0x08, 0x0200, 0x0000);
+       phy_write(phydev, 0x1f, 0x0000);
+
+       phy_write(phydev, 0x1f, 0x0000);
+       phy_write(phydev, 0x12, 0x0048);
+       phy_write(phydev, 0x1f, 0x2a30);
+       phy_modify(phydev, 0x16, 0x0fc0, 0x0240);
+       phy_modify(phydev, 0x14, 0x6000, 0x4000);
+       phy_write(phydev, 0x1f, 0x0001);
+       phy_modify(phydev, 0x14, 0xe000, 0x6000);
+       phy_write(phydev, 0x1f, 0x0000);
+
+       vsc73xx_config_init(phydev);
+
+       return genphy_config_init(phydev);
+}
+
+static int vsc73xx_config_aneg(struct phy_device *phydev)
+{
+       /* The VSC73xx switches does not like to be instructed to
+        * do autonegotiation in any way, it prefers that you just go
+        * with the power-on/reset defaults. Writing some registers will
+        * just make autonegotiation permanently fail.
+        */
+       return 0;
+}
+
 /* This adds a skew for both TX and RX clocks, so the skew should only be
  * applied to "rgmii-id" interfaces. It may not work as expected
  * on "rgmii-txid", "rgmii-rxid" or "rgmii" interfaces. */
@@ -318,6 +453,42 @@ static struct phy_driver vsc82xx_driver[] = {
        .config_init    = &vsc8601_config_init,
        .ack_interrupt  = &vsc824x_ack_interrupt,
        .config_intr    = &vsc82xx_config_intr,
+}, {
+       .phy_id         = PHY_ID_VSC7385,
+       .name           = "Vitesse VSC7385",
+       .phy_id_mask    = 0x000ffff0,
+       .features       = PHY_GBIT_FEATURES,
+       .config_init    = vsc738x_config_init,
+       .config_aneg    = vsc73xx_config_aneg,
+       .read_page      = vsc73xx_read_page,
+       .write_page     = vsc73xx_write_page,
+}, {
+       .phy_id         = PHY_ID_VSC7388,
+       .name           = "Vitesse VSC7388",
+       .phy_id_mask    = 0x000ffff0,
+       .features       = PHY_GBIT_FEATURES,
+       .config_init    = vsc738x_config_init,
+       .config_aneg    = vsc73xx_config_aneg,
+       .read_page      = vsc73xx_read_page,
+       .write_page     = vsc73xx_write_page,
+}, {
+       .phy_id         = PHY_ID_VSC7395,
+       .name           = "Vitesse VSC7395",
+       .phy_id_mask    = 0x000ffff0,
+       .features       = PHY_GBIT_FEATURES,
+       .config_init    = vsc739x_config_init,
+       .config_aneg    = vsc73xx_config_aneg,
+       .read_page      = vsc73xx_read_page,
+       .write_page     = vsc73xx_write_page,
+}, {
+       .phy_id         = PHY_ID_VSC7398,
+       .name           = "Vitesse VSC7398",
+       .phy_id_mask    = 0x000ffff0,
+       .features       = PHY_GBIT_FEATURES,
+       .config_init    = vsc739x_config_init,
+       .config_aneg    = vsc73xx_config_aneg,
+       .read_page      = vsc73xx_read_page,
+       .write_page     = vsc73xx_write_page,
 }, {
        .phy_id         = PHY_ID_VSC8662,
        .name           = "Vitesse VSC8662",
@@ -358,6 +529,10 @@ static struct mdio_device_id __maybe_unused vitesse_tbl[] = {
        { PHY_ID_VSC8514, 0x000ffff0 },
        { PHY_ID_VSC8572, 0x000ffff0 },
        { PHY_ID_VSC8574, 0x000ffff0 },
+       { PHY_ID_VSC7385, 0x000ffff0 },
+       { PHY_ID_VSC7388, 0x000ffff0 },
+       { PHY_ID_VSC7395, 0x000ffff0 },
+       { PHY_ID_VSC7398, 0x000ffff0 },
        { PHY_ID_VSC8662, 0x000ffff0 },
        { PHY_ID_VSC8221, 0x000ffff0 },
        { PHY_ID_VSC8211, 0x000ffff0 },
index 2e5150b0b8d52c5dd784a3df1818962d64972898..74a8782313cf5b0319a7f3bad936926c4f4481da 100644 (file)
@@ -33,17 +33,22 @@ struct gmii2rgmii {
        struct phy_device *phy_dev;
        struct phy_driver *phy_drv;
        struct phy_driver conv_phy_drv;
-       int addr;
+       struct mdio_device *mdio;
 };
 
 static int xgmiitorgmii_read_status(struct phy_device *phydev)
 {
        struct gmii2rgmii *priv = phydev->priv;
+       struct mii_bus *bus = priv->mdio->bus;
+       int addr = priv->mdio->addr;
        u16 val = 0;
+       int err;
 
-       priv->phy_drv->read_status(phydev);
+       err = priv->phy_drv->read_status(phydev);
+       if (err < 0)
+               return err;
 
-       val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG);
+       val = mdiobus_read(bus, addr, XILINX_GMII2RGMII_REG);
        val &= ~XILINX_GMII2RGMII_SPEED_MASK;
 
        if (phydev->speed == SPEED_1000)
@@ -53,7 +58,7 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev)
        else
                val |= BMCR_SPEED10;
 
-       mdiobus_write(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG, val);
+       mdiobus_write(bus, addr, XILINX_GMII2RGMII_REG, val);
 
        return 0;
 }
@@ -81,7 +86,12 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
                return -EPROBE_DEFER;
        }
 
-       priv->addr = mdiodev->addr;
+       if (!priv->phy_dev->drv) {
+               dev_info(dev, "Attached phy not ready\n");
+               return -EPROBE_DEFER;
+       }
+
+       priv->mdio = mdiodev;
        priv->phy_drv = priv->phy_dev->drv;
        memcpy(&priv->conv_phy_drv, priv->phy_dev->drv,
               sizeof(struct phy_driver));
index 6c7fd98cb00a12afe89a04af55ae40cb16c825f0..a205750b431ba5565df39a3d7d94ac409513fd9e 100644 (file)
@@ -96,7 +96,7 @@ static inline void sha_pad_init(struct sha_pad *shapad)
  */
 struct ppp_mppe_state {
        struct crypto_skcipher *arc4;
-       struct crypto_ahash *sha1;
+       struct shash_desc *sha1;
        unsigned char *sha1_digest;
        unsigned char master_key[MPPE_MAX_KEY_LEN];
        unsigned char session_key[MPPE_MAX_KEY_LEN];
@@ -136,25 +136,16 @@ struct ppp_mppe_state {
  */
 static void get_new_key_from_sha(struct ppp_mppe_state * state)
 {
-       AHASH_REQUEST_ON_STACK(req, state->sha1);
-       struct scatterlist sg[4];
-       unsigned int nbytes;
-
-       sg_init_table(sg, 4);
-
-       nbytes = setup_sg(&sg[0], state->master_key, state->keylen);
-       nbytes += setup_sg(&sg[1], sha_pad->sha_pad1,
-                          sizeof(sha_pad->sha_pad1));
-       nbytes += setup_sg(&sg[2], state->session_key, state->keylen);
-       nbytes += setup_sg(&sg[3], sha_pad->sha_pad2,
-                          sizeof(sha_pad->sha_pad2));
-
-       ahash_request_set_tfm(req, state->sha1);
-       ahash_request_set_callback(req, 0, NULL, NULL);
-       ahash_request_set_crypt(req, sg, state->sha1_digest, nbytes);
-
-       crypto_ahash_digest(req);
-       ahash_request_zero(req);
+       crypto_shash_init(state->sha1);
+       crypto_shash_update(state->sha1, state->master_key,
+                           state->keylen);
+       crypto_shash_update(state->sha1, sha_pad->sha_pad1,
+                           sizeof(sha_pad->sha_pad1));
+       crypto_shash_update(state->sha1, state->session_key,
+                           state->keylen);
+       crypto_shash_update(state->sha1, sha_pad->sha_pad2,
+                           sizeof(sha_pad->sha_pad2));
+       crypto_shash_final(state->sha1, state->sha1_digest);
 }
 
 /*
@@ -200,6 +191,7 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key)
 static void *mppe_alloc(unsigned char *options, int optlen)
 {
        struct ppp_mppe_state *state;
+       struct crypto_shash *shash;
        unsigned int digestsize;
 
        if (optlen != CILEN_MPPE + sizeof(state->master_key) ||
@@ -217,13 +209,21 @@ static void *mppe_alloc(unsigned char *options, int optlen)
                goto out_free;
        }
 
-       state->sha1 = crypto_alloc_ahash("sha1", 0, CRYPTO_ALG_ASYNC);
-       if (IS_ERR(state->sha1)) {
-               state->sha1 = NULL;
+       shash = crypto_alloc_shash("sha1", 0, 0);
+       if (IS_ERR(shash))
+               goto out_free;
+
+       state->sha1 = kmalloc(sizeof(*state->sha1) +
+                                    crypto_shash_descsize(shash),
+                             GFP_KERNEL);
+       if (!state->sha1) {
+               crypto_free_shash(shash);
                goto out_free;
        }
+       state->sha1->tfm = shash;
+       state->sha1->flags = 0;
 
-       digestsize = crypto_ahash_digestsize(state->sha1);
+       digestsize = crypto_shash_digestsize(shash);
        if (digestsize < MPPE_MAX_KEY_LEN)
                goto out_free;
 
@@ -246,7 +246,10 @@ static void *mppe_alloc(unsigned char *options, int optlen)
 
 out_free:
        kfree(state->sha1_digest);
-       crypto_free_ahash(state->sha1);
+       if (state->sha1) {
+               crypto_free_shash(state->sha1->tfm);
+               kzfree(state->sha1);
+       }
        crypto_free_skcipher(state->arc4);
        kfree(state);
 out:
@@ -261,7 +264,8 @@ static void mppe_free(void *arg)
        struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg;
        if (state) {
                kfree(state->sha1_digest);
-               crypto_free_ahash(state->sha1);
+               crypto_free_shash(state->sha1->tfm);
+               kzfree(state->sha1);
                crypto_free_skcipher(state->arc4);
                kfree(state);
        }
index b070959737ffe744f08683926a486c66ee08bb4a..6a047d30e8c69f81cfb234113d66d03d216878ac 100644 (file)
 
 #define team_port_exists(dev) (dev->priv_flags & IFF_TEAM_PORT)
 
-static struct team_port *team_port_get_rcu(const struct net_device *dev)
-{
-       return rcu_dereference(dev->rx_handler_data);
-}
-
 static struct team_port *team_port_get_rtnl(const struct net_device *dev)
 {
        struct team_port *port = rtnl_dereference(dev->rx_handler_data);
@@ -1707,7 +1702,8 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb,
-                            void *accel_priv, select_queue_fallback_t fallback)
+                            struct net_device *sb_dev,
+                            select_queue_fallback_t fallback)
 {
        /*
         * This helper function exists to help dev_pick_tx get the correct
index f5727baac84a5d10fd70837a75fcfa8194992f9a..2bbefe828670135d85ae8b4cdb9e537e27301be8 100644 (file)
@@ -200,6 +200,7 @@ struct tun_flow_entry {
 };
 
 #define TUN_NUM_FLOW_ENTRIES 1024
+#define TUN_MASK_FLOW_ENTRIES (TUN_NUM_FLOW_ENTRIES - 1)
 
 struct tun_prog {
        struct rcu_head rcu;
@@ -406,7 +407,7 @@ static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
 
 static inline u32 tun_hashfn(u32 rxhash)
 {
-       return rxhash & 0x3ff;
+       return rxhash & TUN_MASK_FLOW_ENTRIES;
 }
 
 static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
@@ -607,7 +608,8 @@ static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
 }
 
 static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
-                           void *accel_priv, select_queue_fallback_t fallback)
+                           struct net_device *sb_dev,
+                           select_queue_fallback_t fallback)
 {
        struct tun_struct *tun = netdev_priv(dev);
        u16 ret;
@@ -1268,7 +1270,6 @@ static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
                return tun_xdp_set(dev, xdp->prog, xdp->extack);
        case XDP_QUERY_PROG:
                xdp->prog_id = tun_xdp_query(dev);
-               xdp->prog_attached = !!xdp->prog_id;
                return 0;
        default:
                return -EINVAL;
index b1b3d8f7e67dd052eae618e33698c633751df60a..b654f05b2ccd0b85c88cd42d52dc7c4fea44b868 100644 (file)
@@ -693,24 +693,32 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
        u32 phyid;
        struct asix_common_private *priv;
 
-       usbnet_get_endpoints(dev,intf);
+       usbnet_get_endpoints(dev, intf);
 
-       /* Get the MAC address */
-       if (dev->driver_info->data & FLAG_EEPROM_MAC) {
-               for (i = 0; i < (ETH_ALEN >> 1); i++) {
-                       ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x04 + i,
-                                           0, 2, buf + i * 2, 0);
-                       if (ret < 0)
-                               break;
-               }
+       /* Maybe the boot loader passed the MAC address via device tree */
+       if (!eth_platform_get_mac_address(&dev->udev->dev, buf)) {
+               netif_dbg(dev, ifup, dev->net,
+                         "MAC address read from device tree");
        } else {
-               ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID,
-                               0, 0, ETH_ALEN, buf, 0);
-       }
+               /* Try getting the MAC address from EEPROM */
+               if (dev->driver_info->data & FLAG_EEPROM_MAC) {
+                       for (i = 0; i < (ETH_ALEN >> 1); i++) {
+                               ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM,
+                                                   0x04 + i, 0, 2, buf + i * 2,
+                                                   0);
+                               if (ret < 0)
+                                       break;
+                       }
+               } else {
+                       ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID,
+                                           0, 0, ETH_ALEN, buf, 0);
+               }
 
-       if (ret < 0) {
-               netdev_dbg(dev->net, "Failed to read MAC address: %d\n", ret);
-               return ret;
+               if (ret < 0) {
+                       netdev_dbg(dev->net, "Failed to read MAC address: %d\n",
+                                  ret);
+                       return ret;
+               }
        }
 
        asix_set_netdev_dev_addr(dev, buf);
index 18d36dff97ea64d623547ebd0ceb3222bfa70163..424053bd8b21bdd000b1432d7f3d9425fbc2be09 100644 (file)
@@ -869,6 +869,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
                default:
                        dev_warn(&intf->dev,
                                 "Couldn't detect memory size, assuming 32k\n");
+                       /* fall through */
                case 0x87654321:
                        catc_set_reg(catc, TxBufCount, 4);
                        catc_set_reg(catc, RxBufCount, 16);
index 288ecd9991713dcc0fb93372d39eefe3195db87e..78b16eb9e58c8ddb93e35ed965f00b1f173f3804 100644 (file)
@@ -99,6 +99,7 @@ static void tx_complete(struct urb *req)
        struct net_device *dev = skb->dev;
        struct usbpn_dev *pnd = netdev_priv(dev);
        int status = req->status;
+       unsigned long flags;
 
        switch (status) {
        case 0:
@@ -109,16 +110,17 @@ static void tx_complete(struct urb *req)
        case -ECONNRESET:
        case -ESHUTDOWN:
                dev->stats.tx_aborted_errors++;
+               /* fall through */
        default:
                dev->stats.tx_errors++;
                dev_dbg(&dev->dev, "TX error (%d)\n", status);
        }
        dev->stats.tx_packets++;
 
-       spin_lock(&pnd->tx_lock);
+       spin_lock_irqsave(&pnd->tx_lock, flags);
        pnd->tx_queue--;
        netif_wake_queue(dev);
-       spin_unlock(&pnd->tx_lock);
+       spin_unlock_irqrestore(&pnd->tx_lock, flags);
 
        dev_kfree_skb_any(skb);
        usb_free_urb(req);
index e53883ad61073d428c431e795de1331db87c5070..184c24baca1527333d92ec927c48e958ad6c95f0 100644 (file)
@@ -999,6 +999,7 @@ static void read_bulk_callback(struct urb *urb)
        struct hso_net *odev = urb->context;
        struct net_device *net;
        int result;
+       unsigned long flags;
        int status = urb->status;
 
        /* is al ok?  (Filip: Who's Al ?) */
@@ -1028,11 +1029,11 @@ static void read_bulk_callback(struct urb *urb)
        if (urb->actual_length) {
                /* Handle the IP stream, add header and push it onto network
                 * stack if the packet is complete. */
-               spin_lock(&odev->net_lock);
+               spin_lock_irqsave(&odev->net_lock, flags);
                packetizeRx(odev, urb->transfer_buffer, urb->actual_length,
                            (urb->transfer_buffer_length >
                             urb->actual_length) ? 1 : 0);
-               spin_unlock(&odev->net_lock);
+               spin_unlock_irqrestore(&odev->net_lock, flags);
        }
 
        /* We are done with this URB, resubmit it. Prep the USB to wait for
@@ -1193,6 +1194,7 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb)
 {
        struct hso_serial *serial = urb->context;
        int status = urb->status;
+       unsigned long flags;
 
        hso_dbg(0x8, "--- Got serial_read_bulk callback %02x ---\n", status);
 
@@ -1216,10 +1218,10 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb)
        if (serial->parent->port_spec & HSO_INFO_CRC_BUG)
                fix_crc_bug(urb, serial->in_endp->wMaxPacketSize);
        /* Valid data, handle RX data */
-       spin_lock(&serial->serial_lock);
+       spin_lock_irqsave(&serial->serial_lock, flags);
        serial->rx_urb_filled[hso_urb_to_index(serial, urb)] = 1;
        put_rxbuf_data_and_resubmit_bulk_urb(serial);
-       spin_unlock(&serial->serial_lock);
+       spin_unlock_irqrestore(&serial->serial_lock, flags);
 }
 
 /*
@@ -1502,12 +1504,13 @@ static void tiocmget_intr_callback(struct urb *urb)
                DUMP(serial_state_notification,
                     sizeof(struct hso_serial_state_notification));
        } else {
+               unsigned long flags;
 
                UART_state_bitmap = le16_to_cpu(serial_state_notification->
                                                UART_state_bitmap);
                prev_UART_state_bitmap = tiocmget->prev_UART_state_bitmap;
                icount = &tiocmget->icount;
-               spin_lock(&serial->serial_lock);
+               spin_lock_irqsave(&serial->serial_lock, flags);
                if ((UART_state_bitmap & B_OVERRUN) !=
                   (prev_UART_state_bitmap & B_OVERRUN))
                        icount->parity++;
@@ -1530,7 +1533,7 @@ static void tiocmget_intr_callback(struct urb *urb)
                   (prev_UART_state_bitmap & B_RX_CARRIER))
                        icount->dcd++;
                tiocmget->prev_UART_state_bitmap = UART_state_bitmap;
-               spin_unlock(&serial->serial_lock);
+               spin_unlock_irqrestore(&serial->serial_lock, flags);
                tiocmget->intr_completed = 1;
                wake_up_interruptible(&tiocmget->waitq);
        }
@@ -1729,7 +1732,6 @@ static int hso_serial_ioctl(struct tty_struct *tty,
 /* starts a transmit */
 static void hso_kick_transmit(struct hso_serial *serial)
 {
-       u8 *temp;
        unsigned long flags;
        int res;
 
@@ -1745,14 +1747,12 @@ static void hso_kick_transmit(struct hso_serial *serial)
                goto out;
 
        /* Switch pointers around to avoid memcpy */
-       temp = serial->tx_buffer;
-       serial->tx_buffer = serial->tx_data;
-       serial->tx_data = temp;
+       swap(serial->tx_buffer, serial->tx_data);
        serial->tx_data_count = serial->tx_buffer_count;
        serial->tx_buffer_count = 0;
 
-       /* If temp is set, it means we switched buffers */
-       if (temp && serial->write_data) {
+       /* If serial->tx_data is set, it means we switched buffers */
+       if (serial->tx_data && serial->write_data) {
                res = serial->write_data(serial);
                if (res >= 0)
                        serial->tx_urb_used = 1;
@@ -1852,6 +1852,7 @@ static void intr_callback(struct urb *urb)
        struct hso_serial *serial;
        unsigned char *port_req;
        int status = urb->status;
+       unsigned long flags;
        int i;
 
        usb_mark_last_busy(urb->dev);
@@ -1879,7 +1880,7 @@ static void intr_callback(struct urb *urb)
                        if (serial != NULL) {
                                hso_dbg(0x1, "Pending read interrupt on port %d\n",
                                        i);
-                               spin_lock(&serial->serial_lock);
+                               spin_lock_irqsave(&serial->serial_lock, flags);
                                if (serial->rx_state == RX_IDLE &&
                                        serial->port.count > 0) {
                                        /* Setup and send a ctrl req read on
@@ -1893,7 +1894,8 @@ static void intr_callback(struct urb *urb)
                                        hso_dbg(0x1, "Already a read pending on port %d or port not open\n",
                                                i);
                                }
-                               spin_unlock(&serial->serial_lock);
+                               spin_unlock_irqrestore(&serial->serial_lock,
+                                                      flags);
                        }
                }
        }
@@ -1920,6 +1922,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb)
 {
        struct hso_serial *serial = urb->context;
        int status = urb->status;
+       unsigned long flags;
 
        /* sanity check */
        if (!serial) {
@@ -1927,9 +1930,9 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb)
                return;
        }
 
-       spin_lock(&serial->serial_lock);
+       spin_lock_irqsave(&serial->serial_lock, flags);
        serial->tx_urb_used = 0;
-       spin_unlock(&serial->serial_lock);
+       spin_unlock_irqrestore(&serial->serial_lock, flags);
        if (status) {
                handle_usb_error(status, __func__, serial->parent);
                return;
@@ -1971,14 +1974,15 @@ static void ctrl_callback(struct urb *urb)
        struct hso_serial *serial = urb->context;
        struct usb_ctrlrequest *req;
        int status = urb->status;
+       unsigned long flags;
 
        /* sanity check */
        if (!serial)
                return;
 
-       spin_lock(&serial->serial_lock);
+       spin_lock_irqsave(&serial->serial_lock, flags);
        serial->tx_urb_used = 0;
-       spin_unlock(&serial->serial_lock);
+       spin_unlock_irqrestore(&serial->serial_lock, flags);
        if (status) {
                handle_usb_error(status, __func__, serial->parent);
                return;
@@ -1994,9 +1998,9 @@ static void ctrl_callback(struct urb *urb)
            (USB_DIR_IN | USB_TYPE_OPTION_VENDOR | USB_RECIP_INTERFACE)) {
                /* response to a read command */
                serial->rx_urb_filled[0] = 1;
-               spin_lock(&serial->serial_lock);
+               spin_lock_irqsave(&serial->serial_lock, flags);
                put_rxbuf_data_and_resubmit_ctrl_urb(serial);
-               spin_unlock(&serial->serial_lock);
+               spin_unlock_irqrestore(&serial->serial_lock, flags);
        } else {
                hso_put_activity(serial->parent);
                tty_port_tty_wakeup(&serial->port);
index f1605833c5cf1d7eebe5f7189f0c4e43ba32e5ad..913e50bab0a2f6c039e30f8defe434284f5a487f 100644 (file)
@@ -587,7 +587,7 @@ static void kaweth_usb_receive(struct urb *urb)
        struct kaweth_device *kaweth = urb->context;
        struct net_device *net = kaweth->net;
        int status = urb->status;
-
+       unsigned long flags;
        int count = urb->actual_length;
        int count2 = urb->transfer_buffer_length;
 
@@ -619,12 +619,12 @@ static void kaweth_usb_receive(struct urb *urb)
                net->stats.rx_errors++;
                dev_dbg(dev, "Status was -EOVERFLOW.\n");
        }
-       spin_lock(&kaweth->device_lock);
+       spin_lock_irqsave(&kaweth->device_lock, flags);
        if (IS_BLOCKED(kaweth->status)) {
-               spin_unlock(&kaweth->device_lock);
+               spin_unlock_irqrestore(&kaweth->device_lock, flags);
                return;
        }
-       spin_unlock(&kaweth->device_lock);
+       spin_unlock_irqrestore(&kaweth->device_lock, flags);
 
        if(status && status != -EREMOTEIO && count != 1) {
                dev_err(&kaweth->intf->dev,
index aeca484a75b89742458e4b76dca220c54f8c208d..a9991c5f4736b6dd1e395527fbfeeecc3d0ab303 100644 (file)
@@ -1649,7 +1649,7 @@ lan78xx_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
        struct lan78xx_net *dev = netdev_priv(netdev);
 
        /* Read Device/MAC registers */
-       for (i = 0; i < (sizeof(lan78xx_regs) / sizeof(u32)); i++)
+       for (i = 0; i < ARRAY_SIZE(lan78xx_regs); i++)
                lan78xx_read_reg(dev, lan78xx_regs[i], &data[i]);
 
        if (!netdev->phydev)
@@ -1723,7 +1723,7 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
                                  "MAC address read from EEPROM");
                } else {
                        /* generate random MAC */
-                       random_ether_addr(addr);
+                       eth_random_addr(addr);
                        netif_dbg(dev, ifup, dev->net,
                                  "MAC address set to random addr");
                }
index 6514c86f043eeb8777354094c4d2fd9f2ffd9c9d..f4247b275e0901a54ebf0b36d67adaf708bf6950 100644 (file)
@@ -1067,7 +1067,7 @@ static inline void setup_pegasus_II(pegasus_t *pegasus)
 
        set_register(pegasus, Reg1d, 0);
        set_register(pegasus, Reg7b, 1);
-       mdelay(100);
+       msleep(100);
        if ((pegasus->features & HAS_HOME_PNA) && mii_mode)
                set_register(pegasus, Reg7b, 0);
        else
index 2a58607a6aea809b14e0aa03955cfa099118e607..124211afb023fc8d729a8208aa43099b2b171690 100644 (file)
@@ -1252,6 +1252,7 @@ static void read_bulk_callback(struct urb *urb)
        int status = urb->status;
        struct rx_agg *agg;
        struct r8152 *tp;
+       unsigned long flags;
 
        agg = urb->context;
        if (!agg)
@@ -1281,9 +1282,9 @@ static void read_bulk_callback(struct urb *urb)
                if (urb->actual_length < ETH_ZLEN)
                        break;
 
-               spin_lock(&tp->rx_lock);
+               spin_lock_irqsave(&tp->rx_lock, flags);
                list_add_tail(&agg->list, &tp->rx_done);
-               spin_unlock(&tp->rx_lock);
+               spin_unlock_irqrestore(&tp->rx_lock, flags);
                napi_schedule(&tp->napi);
                return;
        case -ESHUTDOWN:
@@ -1311,6 +1312,7 @@ static void write_bulk_callback(struct urb *urb)
        struct net_device *netdev;
        struct tx_agg *agg;
        struct r8152 *tp;
+       unsigned long flags;
        int status = urb->status;
 
        agg = urb->context;
@@ -1332,9 +1334,9 @@ static void write_bulk_callback(struct urb *urb)
                stats->tx_bytes += agg->skb_len;
        }
 
-       spin_lock(&tp->tx_lock);
+       spin_lock_irqsave(&tp->tx_lock, flags);
        list_add_tail(&agg->list, &tp->tx_free);
-       spin_unlock(&tp->tx_lock);
+       spin_unlock_irqrestore(&tp->tx_lock, flags);
 
        usb_autopm_put_interface_async(tp->intf);
 
@@ -1374,6 +1376,7 @@ static void intr_callback(struct urb *urb)
        case -ECONNRESET:       /* unlink */
        case -ESHUTDOWN:
                netif_device_detach(tp->netdev);
+               /* fall through */
        case -ENOENT:
        case -EPROTO:
                netif_info(tp, intr, tp->netdev,
@@ -2739,6 +2742,7 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable)
                        r8152_mdio_write(tp, MII_BMCR, data);
 
                        data = r8153_phy_status(tp, PHY_STAT_LAN_ON);
+                       /* fall through */
 
                default:
                        if (data != PHY_STAT_LAN_ON)
index 48ba80a8ca5ce8e566931979edcff4bcfe47bc2e..80373a9171dd2d0e06a19cb21aeb37934bea4b37 100644 (file)
@@ -391,6 +391,7 @@ static void read_bulk_callback(struct urb *urb)
        u16 rx_stat;
        int status = urb->status;
        int result;
+       unsigned long flags;
 
        dev = urb->context;
        if (!dev)
@@ -432,9 +433,9 @@ static void read_bulk_callback(struct urb *urb)
        netdev->stats.rx_packets++;
        netdev->stats.rx_bytes += pkt_len;
 
-       spin_lock(&dev->rx_pool_lock);
+       spin_lock_irqsave(&dev->rx_pool_lock, flags);
        skb = pull_skb(dev);
-       spin_unlock(&dev->rx_pool_lock);
+       spin_unlock_irqrestore(&dev->rx_pool_lock, flags);
        if (!skb)
                goto resched;
 
index 2d316c1b851b2aceb19695e0035c94251f6ec475..6ac232e52bf7c683832aee40dc1d0fabcd9a6202 100644 (file)
@@ -358,7 +358,7 @@ static int sr9700_bind(struct usbnet *dev, struct usb_interface *intf)
        /* power up and reset phy */
        sr_write_reg(dev, SR_PRR, PRR_PHY_RST);
        /* at least 10ms, here 20ms for safe */
-       mdelay(20);
+       msleep(20);
        sr_write_reg(dev, SR_PRR, 0);
        /* at least 1ms, here 2ms for reading right register */
        udelay(2 * 1000);
index 2b6ec927809e95b5036ff46c2bd5565a9d112eee..14f661cbae185436deb8ae573ca27d8f6d0d2d76 100644 (file)
@@ -82,25 +82,43 @@ struct virtnet_sq_stats {
        struct u64_stats_sync syncp;
        u64 packets;
        u64 bytes;
+       u64 xdp_tx;
+       u64 xdp_tx_drops;
+       u64 kicks;
 };
 
 struct virtnet_rq_stats {
        struct u64_stats_sync syncp;
        u64 packets;
        u64 bytes;
+       u64 drops;
+       u64 xdp_packets;
+       u64 xdp_tx;
+       u64 xdp_redirects;
+       u64 xdp_drops;
+       u64 kicks;
 };
 
 #define VIRTNET_SQ_STAT(m)     offsetof(struct virtnet_sq_stats, m)
 #define VIRTNET_RQ_STAT(m)     offsetof(struct virtnet_rq_stats, m)
 
 static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
-       { "packets",    VIRTNET_SQ_STAT(packets) },
-       { "bytes",      VIRTNET_SQ_STAT(bytes) },
+       { "packets",            VIRTNET_SQ_STAT(packets) },
+       { "bytes",              VIRTNET_SQ_STAT(bytes) },
+       { "xdp_tx",             VIRTNET_SQ_STAT(xdp_tx) },
+       { "xdp_tx_drops",       VIRTNET_SQ_STAT(xdp_tx_drops) },
+       { "kicks",              VIRTNET_SQ_STAT(kicks) },
 };
 
 static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
-       { "packets",    VIRTNET_RQ_STAT(packets) },
-       { "bytes",      VIRTNET_RQ_STAT(bytes) },
+       { "packets",            VIRTNET_RQ_STAT(packets) },
+       { "bytes",              VIRTNET_RQ_STAT(bytes) },
+       { "drops",              VIRTNET_RQ_STAT(drops) },
+       { "xdp_packets",        VIRTNET_RQ_STAT(xdp_packets) },
+       { "xdp_tx",             VIRTNET_RQ_STAT(xdp_tx) },
+       { "xdp_redirects",      VIRTNET_RQ_STAT(xdp_redirects) },
+       { "xdp_drops",          VIRTNET_RQ_STAT(xdp_drops) },
+       { "kicks",              VIRTNET_RQ_STAT(kicks) },
 };
 
 #define VIRTNET_SQ_STATS_LEN   ARRAY_SIZE(virtnet_sq_stats_desc)
@@ -447,22 +465,12 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
        return 0;
 }
 
-static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
-                                  struct xdp_frame *xdpf)
+static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
 {
-       struct xdp_frame *xdpf_sent;
-       struct send_queue *sq;
-       unsigned int len;
        unsigned int qp;
 
        qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
-       sq = &vi->sq[qp];
-
-       /* Free up any pending old buffers before queueing new ones. */
-       while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
-               xdp_return_frame(xdpf_sent);
-
-       return __virtnet_xdp_xmit_one(vi, sq, xdpf);
+       return &vi->sq[qp];
 }
 
 static int virtnet_xdp_xmit(struct net_device *dev,
@@ -474,23 +482,28 @@ static int virtnet_xdp_xmit(struct net_device *dev,
        struct bpf_prog *xdp_prog;
        struct send_queue *sq;
        unsigned int len;
-       unsigned int qp;
        int drops = 0;
-       int err;
+       int kicks = 0;
+       int ret, err;
        int i;
 
-       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
-               return -EINVAL;
+       sq = virtnet_xdp_sq(vi);
 
-       qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
-       sq = &vi->sq[qp];
+       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
+               ret = -EINVAL;
+               drops = n;
+               goto out;
+       }
 
        /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
         * indicate XDP resources have been successfully allocated.
         */
        xdp_prog = rcu_dereference(rq->xdp_prog);
-       if (!xdp_prog)
-               return -ENXIO;
+       if (!xdp_prog) {
+               ret = -ENXIO;
+               drops = n;
+               goto out;
+       }
 
        /* Free up any pending old buffers before queueing new ones. */
        while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
@@ -505,11 +518,20 @@ static int virtnet_xdp_xmit(struct net_device *dev,
                        drops++;
                }
        }
+       ret = n - drops;
 
-       if (flags & XDP_XMIT_FLUSH)
-               virtqueue_kick(sq->vq);
+       if (flags & XDP_XMIT_FLUSH) {
+               if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
+                       kicks = 1;
+       }
+out:
+       u64_stats_update_begin(&sq->stats.syncp);
+       sq->stats.xdp_tx += n;
+       sq->stats.xdp_tx_drops += drops;
+       sq->stats.kicks += kicks;
+       u64_stats_update_end(&sq->stats.syncp);
 
-       return n - drops;
+       return ret;
 }
 
 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@@ -587,7 +609,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
                                     void *buf, void *ctx,
                                     unsigned int len,
                                     unsigned int *xdp_xmit,
-                                    unsigned int *rbytes)
+                                    struct virtnet_rq_stats *stats)
 {
        struct sk_buff *skb;
        struct bpf_prog *xdp_prog;
@@ -602,7 +624,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
        int err;
 
        len -= vi->hdr_len;
-       *rbytes += len;
+       stats->bytes += len;
 
        rcu_read_lock();
        xdp_prog = rcu_dereference(rq->xdp_prog);
@@ -644,6 +666,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
                xdp.rxq = &rq->xdp_rxq;
                orig_data = xdp.data;
                act = bpf_prog_run_xdp(xdp_prog, &xdp);
+               stats->xdp_packets++;
 
                switch (act) {
                case XDP_PASS:
@@ -652,11 +675,12 @@ static struct sk_buff *receive_small(struct net_device *dev,
                        len = xdp.data_end - xdp.data;
                        break;
                case XDP_TX:
+                       stats->xdp_tx++;
                        xdpf = convert_to_xdp_frame(&xdp);
                        if (unlikely(!xdpf))
                                goto err_xdp;
-                       err = __virtnet_xdp_tx_xmit(vi, xdpf);
-                       if (unlikely(err)) {
+                       err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
+                       if (unlikely(err < 0)) {
                                trace_xdp_exception(vi->dev, xdp_prog, act);
                                goto err_xdp;
                        }
@@ -664,6 +688,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
                        rcu_read_unlock();
                        goto xdp_xmit;
                case XDP_REDIRECT:
+                       stats->xdp_redirects++;
                        err = xdp_do_redirect(dev, &xdp, xdp_prog);
                        if (err)
                                goto err_xdp;
@@ -697,7 +722,8 @@ err:
 
 err_xdp:
        rcu_read_unlock();
-       dev->stats.rx_dropped++;
+       stats->xdp_drops++;
+       stats->drops++;
        put_page(page);
 xdp_xmit:
        return NULL;
@@ -708,19 +734,19 @@ static struct sk_buff *receive_big(struct net_device *dev,
                                   struct receive_queue *rq,
                                   void *buf,
                                   unsigned int len,
-                                  unsigned int *rbytes)
+                                  struct virtnet_rq_stats *stats)
 {
        struct page *page = buf;
        struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
 
-       *rbytes += len - vi->hdr_len;
+       stats->bytes += len - vi->hdr_len;
        if (unlikely(!skb))
                goto err;
 
        return skb;
 
 err:
-       dev->stats.rx_dropped++;
+       stats->drops++;
        give_pages(rq, page);
        return NULL;
 }
@@ -732,7 +758,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                                         void *ctx,
                                         unsigned int len,
                                         unsigned int *xdp_xmit,
-                                        unsigned int *rbytes)
+                                        struct virtnet_rq_stats *stats)
 {
        struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
        u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
@@ -745,7 +771,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
        int err;
 
        head_skb = NULL;
-       *rbytes += len - vi->hdr_len;
+       stats->bytes += len - vi->hdr_len;
 
        rcu_read_lock();
        xdp_prog = rcu_dereference(rq->xdp_prog);
@@ -794,6 +820,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                xdp.rxq = &rq->xdp_rxq;
 
                act = bpf_prog_run_xdp(xdp_prog, &xdp);
+               stats->xdp_packets++;
 
                switch (act) {
                case XDP_PASS:
@@ -818,11 +845,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                        }
                        break;
                case XDP_TX:
+                       stats->xdp_tx++;
                        xdpf = convert_to_xdp_frame(&xdp);
                        if (unlikely(!xdpf))
                                goto err_xdp;
-                       err = __virtnet_xdp_tx_xmit(vi, xdpf);
-                       if (unlikely(err)) {
+                       err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
+                       if (unlikely(err < 0)) {
                                trace_xdp_exception(vi->dev, xdp_prog, act);
                                if (unlikely(xdp_page != page))
                                        put_page(xdp_page);
@@ -834,6 +862,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                        rcu_read_unlock();
                        goto xdp_xmit;
                case XDP_REDIRECT:
+                       stats->xdp_redirects++;
                        err = xdp_do_redirect(dev, &xdp, xdp_prog);
                        if (err) {
                                if (unlikely(xdp_page != page))
@@ -883,7 +912,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                        goto err_buf;
                }
 
-               *rbytes += len;
+               stats->bytes += len;
                page = virt_to_head_page(buf);
 
                truesize = mergeable_ctx_to_truesize(ctx);
@@ -929,6 +958,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 
 err_xdp:
        rcu_read_unlock();
+       stats->xdp_drops++;
 err_skb:
        put_page(page);
        while (num_buf-- > 1) {
@@ -939,12 +969,12 @@ err_skb:
                        dev->stats.rx_length_errors++;
                        break;
                }
-               *rbytes += len;
+               stats->bytes += len;
                page = virt_to_head_page(buf);
                put_page(page);
        }
 err_buf:
-       dev->stats.rx_dropped++;
+       stats->drops++;
        dev_kfree_skb(head_skb);
 xdp_xmit:
        return NULL;
@@ -952,7 +982,8 @@ xdp_xmit:
 
 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
                        void *buf, unsigned int len, void **ctx,
-                       unsigned int *xdp_xmit, unsigned int *rbytes)
+                       unsigned int *xdp_xmit,
+                       struct virtnet_rq_stats *stats)
 {
        struct net_device *dev = vi->dev;
        struct sk_buff *skb;
@@ -973,11 +1004,11 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
 
        if (vi->mergeable_rx_bufs)
                skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
-                                       rbytes);
+                                       stats);
        else if (vi->big_packets)
-               skb = receive_big(dev, vi, rq, buf, len, rbytes);
+               skb = receive_big(dev, vi, rq, buf, len, stats);
        else
-               skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, rbytes);
+               skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
 
        if (unlikely(!skb))
                return;
@@ -1171,7 +1202,12 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
                if (err)
                        break;
        } while (rq->vq->num_free);
-       virtqueue_kick(rq->vq);
+       if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) {
+               u64_stats_update_begin(&rq->stats.syncp);
+               rq->stats.kicks++;
+               u64_stats_update_end(&rq->stats.syncp);
+       }
+
        return !oom;
 }
 
@@ -1246,22 +1282,24 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
                           unsigned int *xdp_xmit)
 {
        struct virtnet_info *vi = rq->vq->vdev->priv;
-       unsigned int len, received = 0, bytes = 0;
+       struct virtnet_rq_stats stats = {};
+       unsigned int len;
        void *buf;
+       int i;
 
        if (!vi->big_packets || vi->mergeable_rx_bufs) {
                void *ctx;
 
-               while (received < budget &&
+               while (stats.packets < budget &&
                       (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
-                       receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &bytes);
-                       received++;
+                       receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats);
+                       stats.packets++;
                }
        } else {
-               while (received < budget &&
+               while (stats.packets < budget &&
                       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
-                       receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &bytes);
-                       received++;
+                       receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats);
+                       stats.packets++;
                }
        }
 
@@ -1271,11 +1309,16 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
        }
 
        u64_stats_update_begin(&rq->stats.syncp);
-       rq->stats.bytes += bytes;
-       rq->stats.packets += received;
+       for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) {
+               size_t offset = virtnet_rq_stats_desc[i].offset;
+               u64 *item;
+
+               item = (u64 *)((u8 *)&rq->stats + offset);
+               *item += *(u64 *)((u8 *)&stats + offset);
+       }
        u64_stats_update_end(&rq->stats.syncp);
 
-       return received;
+       return stats.packets;
 }
 
 static void free_old_xmit_skbs(struct send_queue *sq)
@@ -1331,7 +1374,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
                container_of(napi, struct receive_queue, napi);
        struct virtnet_info *vi = rq->vq->vdev->priv;
        struct send_queue *sq;
-       unsigned int received, qp;
+       unsigned int received;
        unsigned int xdp_xmit = 0;
 
        virtnet_poll_cleantx(rq);
@@ -1346,10 +1389,12 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
                xdp_do_flush_map();
 
        if (xdp_xmit & VIRTIO_XDP_TX) {
-               qp = vi->curr_queue_pairs - vi->xdp_queue_pairs +
-                    smp_processor_id();
-               sq = &vi->sq[qp];
-               virtqueue_kick(sq->vq);
+               sq = virtnet_xdp_sq(vi);
+               if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
+                       u64_stats_update_begin(&sq->stats.syncp);
+                       sq->stats.kicks++;
+                       u64_stats_update_end(&sq->stats.syncp);
+               }
        }
 
        return received;
@@ -1511,8 +1556,13 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
                }
        }
 
-       if (kick || netif_xmit_stopped(txq))
-               virtqueue_kick(sq->vq);
+       if (kick || netif_xmit_stopped(txq)) {
+               if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
+                       u64_stats_update_begin(&sq->stats.syncp);
+                       sq->stats.kicks++;
+                       u64_stats_update_end(&sq->stats.syncp);
+               }
+       }
 
        return NETDEV_TX_OK;
 }
@@ -1616,7 +1666,7 @@ static void virtnet_stats(struct net_device *dev,
        int i;
 
        for (i = 0; i < vi->max_queue_pairs; i++) {
-               u64 tpackets, tbytes, rpackets, rbytes;
+               u64 tpackets, tbytes, rpackets, rbytes, rdrops;
                struct receive_queue *rq = &vi->rq[i];
                struct send_queue *sq = &vi->sq[i];
 
@@ -1630,17 +1680,18 @@ static void virtnet_stats(struct net_device *dev,
                        start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
                        rpackets = rq->stats.packets;
                        rbytes   = rq->stats.bytes;
+                       rdrops   = rq->stats.drops;
                } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
 
                tot->rx_packets += rpackets;
                tot->tx_packets += tpackets;
                tot->rx_bytes   += rbytes;
                tot->tx_bytes   += tbytes;
+               tot->rx_dropped += rdrops;
        }
 
        tot->tx_dropped = dev->stats.tx_dropped;
        tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
-       tot->rx_dropped = dev->stats.rx_dropped;
        tot->rx_length_errors = dev->stats.rx_length_errors;
        tot->rx_frame_errors = dev->stats.rx_frame_errors;
 }
@@ -2348,7 +2399,6 @@ static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
                return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
        case XDP_QUERY_PROG:
                xdp->prog_id = virtnet_xdp_query(dev);
-               xdp->prog_attached = !!xdp->prog_id;
                return 0;
        default:
                return -EINVAL;
index e857cb3335f6bd4e54b01050d11a4aa4b12b087b..ababba37d735d62b7fe0500983f411d7806baa17 100644 (file)
@@ -568,11 +568,12 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
        return vh;
 }
 
-static struct sk_buff **vxlan_gro_receive(struct sock *sk,
-                                         struct sk_buff **head,
-                                         struct sk_buff *skb)
+static struct sk_buff *vxlan_gro_receive(struct sock *sk,
+                                        struct list_head *head,
+                                        struct sk_buff *skb)
 {
-       struct sk_buff *p, **pp = NULL;
+       struct sk_buff *pp = NULL;
+       struct sk_buff *p;
        struct vxlanhdr *vh, *vh2;
        unsigned int hlen, off_vx;
        int flush = 1;
@@ -607,7 +608,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk,
 
        skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
 
-       for (p = *head; p; p = p->next) {
+       list_for_each_entry(p, head, list) {
                if (!NAPI_GRO_CB(p)->same_flow)
                        continue;
 
@@ -2154,7 +2155,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                vni = tunnel_id_to_key32(info->key.tun_id);
                ifindex = 0;
                dst_cache = &info->dst_cache;
-               if (info->options_len)
+               if (info->options_len &&
+                   info->key.tun_flags & TUNNEL_VXLAN_OPT)
                        md = ip_tunnel_info_opts(info);
                ttl = info->key.ttl;
                tos = info->key.tos;
index bd46b2552980878c1cb67d222428e320ae5d63ef..2a3f0f1a2b0a2c10ce4530f2e3d48c66a2b5d59f 100644 (file)
@@ -2134,7 +2134,6 @@ static void
 fst_openport(struct fst_port_info *port)
 {
        int signals;
-       int txq_length;
 
        /* Only init things if card is actually running. This allows open to
         * succeed for downloads etc.
@@ -2161,7 +2160,6 @@ fst_openport(struct fst_port_info *port)
                else
                        netif_carrier_off(port_to_dev(port));
 
-               txq_length = port->txqe - port->txqs;
                port->txqe = 0;
                port->txqs = 0;
        }
index 9b09c9d0d0fb860d6788d751e967af538fc7e3f0..5f0366a125e2605b8c78f54c11004cd5ea37a533 100644 (file)
@@ -192,7 +192,7 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
        priv->ucc_pram_offset = qe_muram_alloc(sizeof(struct ucc_hdlc_param),
                                ALIGNMENT_OF_UCC_HDLC_PRAM);
 
-       if (priv->ucc_pram_offset < 0) {
+       if (IS_ERR_VALUE(priv->ucc_pram_offset)) {
                dev_err(priv->dev, "Can not allocate MURAM for hdlc parameter.\n");
                ret = -ENOMEM;
                goto free_tx_bd;
@@ -230,14 +230,14 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
 
        /* Alloc riptr, tiptr */
        riptr = qe_muram_alloc(32, 32);
-       if (riptr < 0) {
+       if (IS_ERR_VALUE(riptr)) {
                dev_err(priv->dev, "Cannot allocate MURAM mem for Receive internal temp data pointer\n");
                ret = -ENOMEM;
                goto free_tx_skbuff;
        }
 
        tiptr = qe_muram_alloc(32, 32);
-       if (tiptr < 0) {
+       if (IS_ERR_VALUE(tiptr)) {
                dev_err(priv->dev, "Cannot allocate MURAM mem for Transmit internal temp data pointer\n");
                ret = -ENOMEM;
                goto free_riptr;
index b3a1b6f5c40648f755df770b0288627587e2ff66..4907453f17f5ea373bd75d299855736cc2808118 100644 (file)
@@ -1491,7 +1491,6 @@ static int lmc_rx(struct net_device *dev)
     lmc_softc_t *sc = dev_to_sc(dev);
     int i;
     int rx_work_limit = LMC_RXDESCS;
-    unsigned int next_rx;
     int rxIntLoopCnt;          /* debug -baz */
     int localLengthErrCnt = 0;
     long stat;
@@ -1505,7 +1504,6 @@ static int lmc_rx(struct net_device *dev)
     rxIntLoopCnt = 0;          /* debug -baz */
 
     i = sc->lmc_next_rx % LMC_RXDESCS;
-    next_rx = sc->lmc_next_rx;
 
     while (((stat = sc->lmc_rxring[i].status) & LMC_RDES_OWN_BIT) != DESC_OWNED_BY_DC21X4)
     {
index 4c417903e9be9f5bb74a7901f179488e812d8038..094cea775d0c0bd3090102cf5d511d08d718fef7 100644 (file)
@@ -566,13 +566,12 @@ static void i2400m_msg_ack_hook(struct i2400m *i2400m,
 {
        int result;
        struct device *dev = i2400m_dev(i2400m);
-       unsigned ack_type, ack_status;
+       unsigned int ack_type;
        char strerr[32];
 
        /* Chew on the message, we might need some information from
         * here */
        ack_type = le16_to_cpu(l3l4_hdr->type);
-       ack_status = le16_to_cpu(l3l4_hdr->status);
        switch (ack_type) {
        case I2400M_MT_CMD_ENTER_POWERSAVE:
                /* This is just left here for the sake of example, as
index a89b5685e68b36d5735bc9591f97f621440cb424..e9fc168bb734504e535be77067dd250a8197bffe 100644 (file)
@@ -1552,7 +1552,6 @@ int i2400m_dev_bootstrap(struct i2400m *i2400m, enum i2400m_bri flags)
        int ret, itr;
        struct device *dev = i2400m_dev(i2400m);
        struct i2400m_fw *i2400m_fw;
-       const struct i2400m_bcf_hdr *bcf;       /* Firmware data */
        const struct firmware *fw;
        const char *fw_name;
 
@@ -1574,7 +1573,7 @@ int i2400m_dev_bootstrap(struct i2400m *i2400m, enum i2400m_bri flags)
        }
 
        /* Load firmware files to memory. */
-       for (itr = 0, bcf = NULL, ret = -ENOENT; ; itr++) {
+       for (itr = 0, ret = -ENOENT; ; itr++) {
                fw_name = i2400m->bus_fw_names[itr];
                if (fw_name == NULL) {
                        dev_err(dev, "Could not find a usable firmware image\n");
index a654687b5fa2276602f9d6d097bf1d60c3f38fa4..9ab3f0fdfea43c2118cb45fcfa9404809160fac8 100644 (file)
@@ -535,14 +535,12 @@ void i2400m_net_erx(struct i2400m *i2400m, struct sk_buff *skb,
 {
        struct net_device *net_dev = i2400m->wimax_dev.net_dev;
        struct device *dev = i2400m_dev(i2400m);
-       int protocol;
 
        d_fnstart(2, dev, "(i2400m %p skb %p [%u] cs %d)\n",
                  i2400m, skb, skb->len, cs);
        switch(cs) {
        case I2400M_CS_IPV4_0:
        case I2400M_CS_IPV4:
-               protocol = ETH_P_IP;
                i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev,
                                          skb->data - ETH_HLEN,
                                          cpu_to_be16(ETH_P_IP));
index 84f071ac0d84d5c001ada50134b2ba3f79d9d71b..54ff5930126c4dca93c00a240790a013ae27c2d8 100644 (file)
@@ -1,15 +1,15 @@
 config ATH10K
-        tristate "Atheros 802.11ac wireless cards support"
-        depends on MAC80211 && HAS_DMA
+       tristate "Atheros 802.11ac wireless cards support"
+       depends on MAC80211 && HAS_DMA
        select ATH_COMMON
        select CRC32
        select WANT_DEV_COREDUMP
        select ATH10K_CE
-        ---help---
-          This module adds support for wireless adapters based on
-          Atheros IEEE 802.11ac family of chipsets.
+       ---help---
+         This module adds support for wireless adapters based on
+         Atheros IEEE 802.11ac family of chipsets.
 
-          If you choose to build a module, it'll be called ath10k.
+         If you choose to build a module, it'll be called ath10k.
 
 config ATH10K_CE
        bool
@@ -41,12 +41,12 @@ config ATH10K_USB
          work in progress and will not fully work.
 
 config ATH10K_SNOC
-        tristate "Qualcomm ath10k SNOC support (EXPERIMENTAL)"
-        depends on ATH10K && ARCH_QCOM
-        ---help---
-          This module adds support for integrated WCN3990 chip connected
-          to system NOC(SNOC). Currently work in progress and will not
-          fully work.
+       tristate "Qualcomm ath10k SNOC support (EXPERIMENTAL)"
+       depends on ATH10K && ARCH_QCOM
+       ---help---
+         This module adds support for integrated WCN3990 chip connected
+         to system NOC(SNOC). Currently work in progress and will not
+         fully work.
 
 config ATH10K_DEBUG
        bool "Atheros ath10k debugging"
index 3b96a43fbda41c12701de113db010acb57297cbe..18c709c484e738cd02a0c7cd373c0f485b5170f9 100644 (file)
@@ -1512,7 +1512,7 @@ ath10k_ce_alloc_src_ring_64(struct ath10k *ar, unsigned int ce_id,
                ret = ath10k_ce_alloc_shadow_base(ar, src_ring, nentries);
                if (ret) {
                        dma_free_coherent(ar->dev,
-                                         (nentries * sizeof(struct ce_desc) +
+                                         (nentries * sizeof(struct ce_desc_64) +
                                           CE_DESC_RING_ALIGN),
                                          src_ring->base_addr_owner_space_unaligned,
                                          base_addr);
index dbeffaef60247587caeb5f336ef004e4b0f57dcb..b8fb5382dedeb9830142f83d5b674f8d445c8e98 100644 (file)
@@ -383,4 +383,46 @@ static inline u32 ath10k_ce_interrupt_summary(struct ath10k *ar)
                return CE_INTERRUPT_SUMMARY;
 }
 
+/* Host software's Copy Engine configuration. */
+#define CE_ATTR_FLAGS 0
+
+/*
+ * Configuration information for a Copy Engine pipe.
+ * Passed from Host to Target during startup (one per CE).
+ *
+ * NOTE: Structure is shared between Host software and Target firmware!
+ */
+struct ce_pipe_config {
+       __le32 pipenum;
+       __le32 pipedir;
+       __le32 nentries;
+       __le32 nbytes_max;
+       __le32 flags;
+       __le32 reserved;
+};
+
+/*
+ * Directions for interconnect pipe configuration.
+ * These definitions may be used during configuration and are shared
+ * between Host and Target.
+ *
+ * Pipe Directions are relative to the Host, so PIPEDIR_IN means
+ * "coming IN over air through Target to Host" as with a WiFi Rx operation.
+ * Conversely, PIPEDIR_OUT means "going OUT from Host through Target over air"
+ * as with a WiFi Tx operation. This is somewhat awkward for the "middle-man"
+ * Target since things that are "PIPEDIR_OUT" are coming IN to the Target
+ * over the interconnect.
+ */
+#define PIPEDIR_NONE    0
+#define PIPEDIR_IN      1  /* Target-->Host, WiFi Rx direction */
+#define PIPEDIR_OUT     2  /* Host->Target, WiFi Tx direction */
+#define PIPEDIR_INOUT   3  /* bidirectional */
+
+/* Establish a mapping between a service/direction and a pipe. */
+struct service_to_pipe {
+       __le32 service_id;
+       __le32 pipedir;
+       __le32 pipenum;
+};
+
 #endif /* _CE_H_ */
index ad4f6e3c07374ca17f991a7151eee02f15bb60ca..85c58ebbfb2618f30d16c8ee017b581cb043c06e 100644 (file)
@@ -41,10 +41,8 @@ static bool uart_print;
 static bool skip_otp;
 static bool rawmode;
 
-/* Enable ATH10K_FW_CRASH_DUMP_REGISTERS and ATH10K_FW_CRASH_DUMP_CE_DATA
- * by default.
- */
-unsigned long ath10k_coredump_mask = 0x3;
+unsigned long ath10k_coredump_mask = BIT(ATH10K_FW_CRASH_DUMP_REGISTERS) |
+                                    BIT(ATH10K_FW_CRASH_DUMP_CE_DATA);
 
 /* FIXME: most of these should be readonly */
 module_param_named(debug_mask, ath10k_debug_mask, uint, 0644);
@@ -82,6 +80,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
@@ -113,6 +112,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
@@ -145,6 +145,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
@@ -176,6 +177,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
@@ -207,6 +209,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
@@ -238,6 +241,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
@@ -272,6 +276,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .target_cpu_freq = 176000000,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
@@ -309,6 +314,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca99x0_ops,
                .decap_align_bytes = 1,
                .spectral_bin_discard = 4,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 11,
@@ -347,6 +353,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca99x0_ops,
                .decap_align_bytes = 1,
                .spectral_bin_discard = 12,
+               .spectral_bin_offset = 8,
 
                /* Can do only 2x2 VHT160 or 80+80. 1560Mbps is 4x4 80Mhz
                 * or 2x2 160Mhz, long-guard-interval.
@@ -388,6 +395,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca99x0_ops,
                .decap_align_bytes = 1,
                .spectral_bin_discard = 12,
+               .spectral_bin_offset = 8,
 
                /* Can do only 1x1 VHT160 or 80+80. 780Mbps is 2x2 80Mhz or
                 * 1x1 160Mhz, long-guard-interval.
@@ -423,6 +431,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca988x_ops,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
@@ -456,6 +465,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .target_cpu_freq = 176000000,
                .decap_align_bytes = 4,
                .spectral_bin_discard = 0,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 8,
@@ -494,6 +504,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
                .hw_ops = &qca99x0_ops,
                .decap_align_bytes = 1,
                .spectral_bin_discard = 4,
+               .spectral_bin_offset = 0,
                .vht160_mcs_rx_highest = 0,
                .vht160_mcs_tx_highest = 0,
                .n_cipher_suites = 11,
index 951dbdd1c9eb8db866e3bb432363dbe09323f87b..427ee5752bb0368bf599186bfddee1d91af00048 100644 (file)
@@ -48,7 +48,8 @@
 #define WMI_READY_TIMEOUT (5 * HZ)
 #define ATH10K_FLUSH_TIMEOUT_HZ (5 * HZ)
 #define ATH10K_CONNECTION_LOSS_HZ (3 * HZ)
-#define ATH10K_NUM_CHANS 40
+#define ATH10K_NUM_CHANS 41
+#define ATH10K_MAX_5G_CHAN 173
 
 /* Antenna noise floor */
 #define ATH10K_DEFAULT_NOISE_FLOOR -95
index 0d98c93a3abaef15bca612fadb989940338ca50d..4926722e0c0d3333cf5a93bdbce747c6cbbef6cf 100644 (file)
@@ -1727,7 +1727,9 @@ int ath10k_debug_start(struct ath10k *ar)
                        ath10k_warn(ar, "failed to disable pktlog: %d\n", ret);
        }
 
-       if (ar->debug.nf_cal_period) {
+       if (ar->debug.nf_cal_period &&
+           !test_bit(ATH10K_FW_FEATURE_NON_BMI,
+                     ar->normal_mode_fw.fw_file.fw_features)) {
                ret = ath10k_wmi_pdev_set_param(ar,
                                                ar->wmi.pdev_param->cal_period,
                                                ar->debug.nf_cal_period);
@@ -1744,7 +1746,9 @@ void ath10k_debug_stop(struct ath10k *ar)
 {
        lockdep_assert_held(&ar->conf_mutex);
 
-       ath10k_debug_cal_data_fetch(ar);
+       if (!test_bit(ATH10K_FW_FEATURE_NON_BMI,
+                     ar->normal_mode_fw.fw_file.fw_features))
+               ath10k_debug_cal_data_fetch(ar);
 
        /* Must not use _sync to avoid deadlock, we do that in
         * ath10k_debug_destroy(). The check for htt_stats_mask is to avoid
@@ -2367,15 +2371,18 @@ int ath10k_debug_register(struct ath10k *ar)
        debugfs_create_file("fw_dbglog", 0600, ar->debug.debugfs_phy, ar,
                            &fops_fw_dbglog);
 
-       debugfs_create_file("cal_data", 0400, ar->debug.debugfs_phy, ar,
-                           &fops_cal_data);
+       if (!test_bit(ATH10K_FW_FEATURE_NON_BMI,
+                     ar->normal_mode_fw.fw_file.fw_features)) {
+               debugfs_create_file("cal_data", 0400, ar->debug.debugfs_phy, ar,
+                                   &fops_cal_data);
+
+               debugfs_create_file("nf_cal_period", 0600, ar->debug.debugfs_phy, ar,
+                                   &fops_nf_cal_period);
+       }
 
        debugfs_create_file("ani_enable", 0600, ar->debug.debugfs_phy, ar,
                            &fops_ani_enable);
 
-       debugfs_create_file("nf_cal_period", 0600, ar->debug.debugfs_phy, ar,
-                           &fops_nf_cal_period);
-
        if (IS_ENABLED(CONFIG_ATH10K_DFS_CERTIFIED)) {
                debugfs_create_file("dfs_simulate_radar", 0200, ar->debug.debugfs_phy,
                                    ar, &fops_simulate_radar);
index 5d8b97a0ccaa537b9cd022fd081d6f94f06c9191..89157c5b5e5fd0771d5be58bb81cba168ffd59aa 100644 (file)
@@ -1202,7 +1202,7 @@ static int ath10k_htt_tx_32(struct ath10k_htt *htt,
        case ATH10K_HW_TXRX_RAW:
        case ATH10K_HW_TXRX_NATIVE_WIFI:
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
-               /* pass through */
+               /* fall through */
        case ATH10K_HW_TXRX_ETHERNET:
                if (ar->hw_params.continuous_frag_desc) {
                        ext_desc_t = htt->frag_desc.vaddr_desc_32;
@@ -1404,7 +1404,7 @@ static int ath10k_htt_tx_64(struct ath10k_htt *htt,
        case ATH10K_HW_TXRX_RAW:
        case ATH10K_HW_TXRX_NATIVE_WIFI:
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
-               /* pass through */
+               /* fall through */
        case ATH10K_HW_TXRX_ETHERNET:
                if (ar->hw_params.continuous_frag_desc) {
                        ext_desc_t = htt->frag_desc.vaddr_desc_64;
index 23467e9fefeb012214749816d409d56e37c50b8e..a274bd809a08d55e829e98f30063a23e7a046122 100644 (file)
@@ -586,6 +586,9 @@ struct ath10k_hw_params {
 
        /* target supporting retention restore on ddr */
        bool rri_on_ddr;
+
+       /* Number of bytes to be the offset for each FFT sample */
+       int spectral_bin_offset;
 };
 
 struct htt_rx_desc;
index 836e0a47b94a0a192b210620d6652c41145cbab1..541bc1c4b2f710526c8cb8470bd46f0c1134cddf 100644 (file)
@@ -7737,7 +7737,7 @@ static void ath10k_sta_statistics(struct ieee80211_hw *hw,
                return;
 
        sinfo->rx_duration = arsta->rx_duration;
-       sinfo->filled |= 1ULL << NL80211_STA_INFO_RX_DURATION;
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION);
 
        if (!arsta->txrate.legacy && !arsta->txrate.nss)
                return;
@@ -7750,7 +7750,7 @@ static void ath10k_sta_statistics(struct ieee80211_hw *hw,
                sinfo->txrate.bw = arsta->txrate.bw;
        }
        sinfo->txrate.flags = arsta->txrate.flags;
-       sinfo->filled |= 1ULL << NL80211_STA_INFO_TX_BITRATE;
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
 }
 
 static const struct ieee80211_ops ath10k_ops = {
@@ -7870,6 +7870,9 @@ static const struct ieee80211_channel ath10k_5ghz_channels[] = {
        CHAN5G(161, 5805, 0),
        CHAN5G(165, 5825, 0),
        CHAN5G(169, 5845, 0),
+       CHAN5G(173, 5865, 0),
+       /* If you add more, you may need to change ATH10K_MAX_5G_CHAN */
+       /* And you will definitely need to change ATH10K_NUM_CHANS in core.h */
 };
 
 struct ath10k *ath10k_mac_create(size_t priv_size)
index e52fd83156b694c2b9bf186277961aaf0827091d..0ed4366571089595ee23060a6d85083ae8b1c933 100644 (file)
@@ -86,48 +86,6 @@ struct pcie_state {
 /* PCIE_CONFIG_FLAG definitions */
 #define PCIE_CONFIG_FLAG_ENABLE_L1  0x0000001
 
-/* Host software's Copy Engine configuration. */
-#define CE_ATTR_FLAGS 0
-
-/*
- * Configuration information for a Copy Engine pipe.
- * Passed from Host to Target during startup (one per CE).
- *
- * NOTE: Structure is shared between Host software and Target firmware!
- */
-struct ce_pipe_config {
-       __le32 pipenum;
-       __le32 pipedir;
-       __le32 nentries;
-       __le32 nbytes_max;
-       __le32 flags;
-       __le32 reserved;
-};
-
-/*
- * Directions for interconnect pipe configuration.
- * These definitions may be used during configuration and are shared
- * between Host and Target.
- *
- * Pipe Directions are relative to the Host, so PIPEDIR_IN means
- * "coming IN over air through Target to Host" as with a WiFi Rx operation.
- * Conversely, PIPEDIR_OUT means "going OUT from Host through Target over air"
- * as with a WiFi Tx operation. This is somewhat awkward for the "middle-man"
- * Target since things that are "PIPEDIR_OUT" are coming IN to the Target
- * over the interconnect.
- */
-#define PIPEDIR_NONE    0
-#define PIPEDIR_IN      1  /* Target-->Host, WiFi Rx direction */
-#define PIPEDIR_OUT     2  /* Host->Target, WiFi Tx direction */
-#define PIPEDIR_INOUT   3  /* bidirectional */
-
-/* Establish a mapping between a service/direction and a pipe. */
-struct service_to_pipe {
-       __le32 service_id;
-       __le32 pipedir;
-       __le32 pipenum;
-};
-
 /* Per-pipe state. */
 struct ath10k_pci_pipe {
        /* Handle of underlying Copy Engine */
index a3a7042fe13ab79a701be77cb93ac0476a785175..fa1843a7e0fdaaec3e74a96d9a8b359a921c9746 100644 (file)
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
-#include <linux/module.h>
+#include <linux/clk.h>
 #include <linux/kernel.h>
-#include "debug.h"
-#include "hif.h"
-#include "htc.h"
-#include "ce.h"
-#include "snoc.h"
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
-#include <linux/clk.h>
-#define  WCN3990_CE_ATTR_FLAGS 0
+
+#include "ce.h"
+#include "debug.h"
+#include "hif.h"
+#include "htc.h"
+#include "snoc.h"
+
 #define ATH10K_SNOC_RX_POST_RETRY_MS 50
 #define CE_POLL_PIPE 4
 
@@ -449,7 +450,7 @@ static void ath10k_snoc_htt_rx_cb(struct ath10k_ce_pipe *ce_state)
 
 static void ath10k_snoc_rx_replenish_retry(struct timer_list *t)
 {
-       struct ath10k_pci *ar_snoc = from_timer(ar_snoc, t, rx_post_retry);
+       struct ath10k_snoc *ar_snoc = from_timer(ar_snoc, t, rx_post_retry);
        struct ath10k *ar = ar_snoc->ar;
 
        ath10k_snoc_rx_post(ar);
@@ -820,7 +821,7 @@ static const struct ath10k_bus_ops ath10k_snoc_bus_ops = {
        .write32        = ath10k_snoc_write32,
 };
 
-int ath10k_snoc_get_ce_id_from_irq(struct ath10k *ar, int irq)
+static int ath10k_snoc_get_ce_id_from_irq(struct ath10k *ar, int irq)
 {
        struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
        int i;
@@ -868,7 +869,7 @@ static int ath10k_snoc_napi_poll(struct napi_struct *ctx, int budget)
        return done;
 }
 
-void ath10k_snoc_init_napi(struct ath10k *ar)
+static void ath10k_snoc_init_napi(struct ath10k *ar)
 {
        netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_snoc_napi_poll,
                       ATH10K_NAPI_BUDGET);
@@ -1303,13 +1304,13 @@ static int ath10k_snoc_probe(struct platform_device *pdev)
        ar_snoc->ce.bus_ops = &ath10k_snoc_bus_ops;
        ar->ce_priv = &ar_snoc->ce;
 
-       ath10k_snoc_resource_init(ar);
+       ret = ath10k_snoc_resource_init(ar);
        if (ret) {
                ath10k_warn(ar, "failed to initialize resource: %d\n", ret);
                goto err_core_destroy;
        }
 
-       ath10k_snoc_setup_resource(ar);
+       ret = ath10k_snoc_setup_resource(ar);
        if (ret) {
                ath10k_warn(ar, "failed to setup resource: %d\n", ret);
                goto err_core_destroy;
@@ -1388,25 +1389,7 @@ static struct platform_driver ath10k_snoc_driver = {
                        .of_match_table = ath10k_snoc_dt_match,
                },
 };
-
-static int __init ath10k_snoc_init(void)
-{
-       int ret;
-
-       ret = platform_driver_register(&ath10k_snoc_driver);
-       if (ret)
-               pr_err("failed to register ath10k snoc driver: %d\n",
-                      ret);
-
-       return ret;
-}
-module_init(ath10k_snoc_init);
-
-static void __exit ath10k_snoc_exit(void)
-{
-       platform_driver_unregister(&ath10k_snoc_driver);
-}
-module_exit(ath10k_snoc_exit);
+module_platform_driver(ath10k_snoc_driver);
 
 MODULE_AUTHOR("Qualcomm");
 MODULE_LICENSE("Dual BSD/GPL");
index 05dc98f46ccd2be5485809a4c38d6dfe688d402f..f9e530189d481a76c8bbb13a7ff46dc29ecc7362 100644 (file)
@@ -19,7 +19,6 @@
 
 #include "hw.h"
 #include "ce.h"
-#include "pci.h"
 
 struct ath10k_snoc_drv_priv {
        enum ath10k_hw_rev hw_rev;
index af6995de7e005d46b85395c2d8289377f3fe712e..653b6d01320773323affbb23ee17eabf11038ae7 100644 (file)
@@ -145,7 +145,7 @@ int ath10k_spectral_process_fft(struct ath10k *ar,
        fft_sample->noise = __cpu_to_be16(phyerr->nf_chains[chain_idx]);
 
        bins = (u8 *)fftr;
-       bins += sizeof(*fftr);
+       bins += sizeof(*fftr) + ar->hw_params.spectral_bin_offset;
 
        fft_sample->tsf = __cpu_to_be64(tsf);
 
index f97ab795cf2e6fe880c3797de07ac9df49e538fb..877249ac6fd446e44dad8455e55e2a8667444021 100644 (file)
@@ -2366,7 +2366,7 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb)
         */
        if (channel >= 1 && channel <= 14) {
                status->band = NL80211_BAND_2GHZ;
-       } else if (channel >= 36 && channel <= 169) {
+       } else if (channel >= 36 && channel <= ATH10K_MAX_5G_CHAN) {
                status->band = NL80211_BAND_5GHZ;
        } else {
                /* Shouldn't happen unless list of advertised channels to
@@ -4602,10 +4602,6 @@ void ath10k_wmi_event_pdev_tpc_config(struct ath10k *ar, struct sk_buff *skb)
 
        ev = (struct wmi_pdev_tpc_config_event *)skb->data;
 
-       tpc_stats = kzalloc(sizeof(*tpc_stats), GFP_ATOMIC);
-       if (!tpc_stats)
-               return;
-
        num_tx_chain = __le32_to_cpu(ev->num_tx_chain);
 
        if (num_tx_chain > WMI_TPC_TX_N_CHAIN) {
@@ -4614,6 +4610,10 @@ void ath10k_wmi_event_pdev_tpc_config(struct ath10k *ar, struct sk_buff *skb)
                return;
        }
 
+       tpc_stats = kzalloc(sizeof(*tpc_stats), GFP_ATOMIC);
+       if (!tpc_stats)
+               return;
+
        ath10k_wmi_tpc_config_get_rate_code(rate_code, pream_table,
                                            num_tx_chain);
 
@@ -5018,13 +5018,11 @@ static int ath10k_wmi_alloc_chunk(struct ath10k *ar, u32 req_id,
        void *vaddr;
 
        pool_size = num_units * round_up(unit_len, 4);
-       vaddr = dma_alloc_coherent(ar->dev, pool_size, &paddr, GFP_KERNEL);
+       vaddr = dma_zalloc_coherent(ar->dev, pool_size, &paddr, GFP_KERNEL);
 
        if (!vaddr)
                return -ENOMEM;
 
-       memset(vaddr, 0, pool_size);
-
        ar->wmi.mem_chunks[idx].vaddr = vaddr;
        ar->wmi.mem_chunks[idx].paddr = paddr;
        ar->wmi.mem_chunks[idx].len = pool_size;
index f23c851765df49dfe35c650249b687abb2ebe965..05140d8baa360e73bb15bdf6b73135304b25ecd9 100644 (file)
@@ -670,6 +670,7 @@ ath5k_hw_init_beacon_timers(struct ath5k_hw *ah, u32 next_beacon, u32 interval)
                break;
        case NL80211_IFTYPE_ADHOC:
                AR5K_REG_ENABLE_BITS(ah, AR5K_TXCFG, AR5K_TXCFG_ADHOC_BCN_ATIM);
+               /* fall through */
        default:
                /* On non-STA modes timer1 is used as next DMA
                 * beacon alert (DBA) timer and timer2 as next
index 0687697d5e2db5ed2e9c2567ae8189e85f9f711f..e121187f371ff5e023b5efda39af6847b5a2c47a 100644 (file)
@@ -1811,20 +1811,20 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 
        if (vif->target_stats.rx_byte) {
                sinfo->rx_bytes = vif->target_stats.rx_byte;
-               sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64);
                sinfo->rx_packets = vif->target_stats.rx_pkt;
-               sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS);
        }
 
        if (vif->target_stats.tx_byte) {
                sinfo->tx_bytes = vif->target_stats.tx_byte;
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64);
                sinfo->tx_packets = vif->target_stats.tx_pkt;
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
        }
 
        sinfo->signal = vif->target_stats.cs_rssi;
-       sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
 
        rate = vif->target_stats.tx_ucast_rate;
 
@@ -1857,12 +1857,12 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
                return 0;
        }
 
-       sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
 
        if (test_bit(CONNECTED, &vif->flags) &&
            test_bit(DTIM_PERIOD_AVAIL, &vif->flags) &&
            vif->nw_type == INFRA_NETWORK) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BSS_PARAM);
                sinfo->bss_param.flags = 0;
                sinfo->bss_param.dtim_period = vif->assoc_bss_dtim_period;
                sinfo->bss_param.beacon_interval = vif->assoc_bss_beacon_int;
@@ -3899,16 +3899,19 @@ int ath6kl_cfg80211_init(struct ath6kl *ar)
        switch (ar->hw.cap) {
        case WMI_11AN_CAP:
                ht = true;
+               /* fall through */
        case WMI_11A_CAP:
                band_5gig = true;
                break;
        case WMI_11GN_CAP:
                ht = true;
+               /* fall through */
        case WMI_11G_CAP:
                band_2gig = true;
                break;
        case WMI_11AGN_CAP:
                ht = true;
+               /* fall through */
        case WMI_11AG_CAP:
                band_2gig = true;
                band_5gig = true;
index 2195b1b7a8a63873989d95c655c2868d00371003..bb50680580f35f90d218a378d8ba066b0dc84e6f 100644 (file)
@@ -1415,6 +1415,7 @@ static const struct sdio_device_id ath6kl_sdio_devices[] = {
        {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x1))},
        {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x2))},
        {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x18))},
+       {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x19))},
        {},
 };
 
index 7922550c2159bbd6269aae15abbab79df4d99017..ef2dd68d3f779fb26de576e429f0ce98f75dc33e 100644 (file)
@@ -583,12 +583,14 @@ static void ar5008_hw_init_chain_masks(struct ath_hw *ah)
        case 0x5:
                REG_SET_BIT(ah, AR_PHY_ANALOG_SWAP,
                            AR_PHY_SWAP_ALT_CHAIN);
+               /* fall through */
        case 0x3:
                if (ah->hw_version.macVersion == AR_SREV_REVISION_5416_10) {
                        REG_WRITE(ah, AR_PHY_RX_CHAINMASK, 0x7);
                        REG_WRITE(ah, AR_PHY_CAL_CHAINMASK, 0x7);
                        break;
                }
+               /* else: fall through */
        case 0x1:
        case 0x2:
        case 0x7:
index 61a9b85045d2ea1c7cb38e7972faf0e6520c58a3..7132918812082c9d164df291e24a08c50122b4a4 100644 (file)
@@ -119,6 +119,7 @@ static int ar9002_hw_set_channel(struct ath_hw *ah, struct ath9k_channel *chan)
                                aModeRefSel = 2;
                        if (aModeRefSel)
                                break;
+                       /* else: fall through */
                case 1:
                default:
                        aModeRefSel = 0;
index f685843a2ff3bf84a5ab0494ed088298828875a4..0a6eb8a8c1ed01ada9d6b10fb861f449f27a637e 100644 (file)
@@ -538,7 +538,7 @@ static int read_file_interrupt(struct seq_file *file, void *data)
        if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) {
                PR_IS("RXLP", rxlp);
                PR_IS("RXHP", rxhp);
-               PR_IS("WATHDOG", bb_watchdog);
+               PR_IS("WATCHDOG", bb_watchdog);
        } else {
                PR_IS("RX", rxok);
        }
index e60bea4604e4104dbcc837ddf23ecd890a299e2e..1665066f4e242a5a0b2441c7d74d591234906d02 100644 (file)
@@ -496,7 +496,7 @@ static void ath9k_hw_init_macaddr(struct ath_hw *ah)
        ath_err(common, "eeprom contains invalid mac address: %pM\n",
                common->macaddr);
 
-       random_ether_addr(common->macaddr);
+       eth_random_addr(common->macaddr);
        ath_err(common, "random mac address will be used: %pM\n",
                common->macaddr);
 
index b6663c80e7ddcd67c1d5ae41077639a9bdf661ce..5eb1c0aea41d7d8b5706c8c4be0e881f28fe0954 100644 (file)
@@ -1928,6 +1928,7 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw,
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
        case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
                flush = true;
+               /* fall through */
        case IEEE80211_AMPDU_TX_STOP_CONT:
                ath9k_ps_wakeup(sc);
                ath_tx_aggr_stop(sc, sta, tid);
index 78946f28d0c7f208e50c2585a7937cb0a56cfe63..013d056a7a4cb4b5c56991a2b256825dad0e6e56 100644 (file)
@@ -302,14 +302,14 @@ int wil_cid_fill_sinfo(struct wil6210_vif *vif, int cid,
 
        sinfo->generation = wil->sinfo_gen;
 
-       sinfo->filled = BIT(NL80211_STA_INFO_RX_BYTES) |
-                       BIT(NL80211_STA_INFO_TX_BYTES) |
-                       BIT(NL80211_STA_INFO_RX_PACKETS) |
-                       BIT(NL80211_STA_INFO_TX_PACKETS) |
-                       BIT(NL80211_STA_INFO_RX_BITRATE) |
-                       BIT(NL80211_STA_INFO_TX_BITRATE) |
-                       BIT(NL80211_STA_INFO_RX_DROP_MISC) |
-                       BIT(NL80211_STA_INFO_TX_FAILED);
+       sinfo->filled = BIT_ULL(NL80211_STA_INFO_RX_BYTES) |
+                       BIT_ULL(NL80211_STA_INFO_TX_BYTES) |
+                       BIT_ULL(NL80211_STA_INFO_RX_PACKETS) |
+                       BIT_ULL(NL80211_STA_INFO_TX_PACKETS) |
+                       BIT_ULL(NL80211_STA_INFO_RX_BITRATE) |
+                       BIT_ULL(NL80211_STA_INFO_TX_BITRATE) |
+                       BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC) |
+                       BIT_ULL(NL80211_STA_INFO_TX_FAILED);
 
        sinfo->txrate.flags = RATE_INFO_FLAGS_60G;
        sinfo->txrate.mcs = le16_to_cpu(reply.evt.bf_mcs);
@@ -322,7 +322,7 @@ int wil_cid_fill_sinfo(struct wil6210_vif *vif, int cid,
        sinfo->tx_failed = stats->tx_errors;
 
        if (test_bit(wil_vif_fwconnected, vif->status)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
                if (test_bit(WMI_FW_CAPABILITY_RSSI_REPORTING,
                             wil->fw_capabilities))
                        sinfo->signal = reply.evt.rssi;
index b01dc34d55af15a8ef3caebe4755f244db371903..3ed3d9f6aae91b5a76d6caee9ac714b1824db6d5 100644 (file)
@@ -1516,10 +1516,9 @@ struct net_device *init_atmel_card(unsigned short irq, unsigned long port,
        priv->present_callback = card_present;
        priv->card = card;
        priv->firmware = NULL;
-       priv->firmware_id[0] = '\0';
        priv->firmware_type = fw_type;
        if (firmware) /* module parameter */
-               strcpy(priv->firmware_id, firmware);
+               strlcpy(priv->firmware_id, firmware, sizeof(priv->firmware_id));
        priv->bus_type = card_present ? BUS_TYPE_PCCARD : BUS_TYPE_PCI;
        priv->station_state = STATION_STATE_DOWN;
        priv->do_rx_crc = 0;
@@ -2646,14 +2645,9 @@ static int atmel_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
                        break;
                }
 
-               if (!(new_firmware = kmalloc(com.len, GFP_KERNEL))) {
-                       rc = -ENOMEM;
-                       break;
-               }
-
-               if (copy_from_user(new_firmware, com.data, com.len)) {
-                       kfree(new_firmware);
-                       rc = -EFAULT;
+               new_firmware = memdup_user(com.data, com.len);
+               if (IS_ERR(new_firmware)) {
+                       rc = PTR_ERR(new_firmware);
                        break;
                }
 
index b6122aad639e9a99aeca765ff744a870c8422c18..24c4e18e7d806059be23eb7c13bb805bc5f9d0b7 100644 (file)
@@ -2434,7 +2434,7 @@ static void brcmf_convert_sta_flags(u32 fw_sta_flags, struct station_info *si)
        struct nl80211_sta_flag_update *sfu;
 
        brcmf_dbg(TRACE, "flags %08x\n", fw_sta_flags);
-       si->filled |= BIT(NL80211_STA_INFO_STA_FLAGS);
+       si->filled |= BIT_ULL(NL80211_STA_INFO_STA_FLAGS);
        sfu = &si->sta_flags;
        sfu->mask = BIT(NL80211_STA_FLAG_WME) |
                    BIT(NL80211_STA_FLAG_AUTHENTICATED) |
@@ -2470,7 +2470,7 @@ static void brcmf_fill_bss_param(struct brcmf_if *ifp, struct station_info *si)
                brcmf_err("Failed to get bss info (%d)\n", err);
                goto out_kfree;
        }
-       si->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
+       si->filled |= BIT_ULL(NL80211_STA_INFO_BSS_PARAM);
        si->bss_param.beacon_interval = le16_to_cpu(buf->bss_le.beacon_period);
        si->bss_param.dtim_period = buf->bss_le.dtim_period;
        capability = le16_to_cpu(buf->bss_le.capability);
@@ -2501,7 +2501,7 @@ brcmf_cfg80211_get_station_ibss(struct brcmf_if *ifp,
                brcmf_err("BRCMF_C_GET_RATE error (%d)\n", err);
                return err;
        }
-       sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
        sinfo->txrate.legacy = rate * 5;
 
        memset(&scbval, 0, sizeof(scbval));
@@ -2512,7 +2512,7 @@ brcmf_cfg80211_get_station_ibss(struct brcmf_if *ifp,
                return err;
        }
        rssi = le32_to_cpu(scbval.val);
-       sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
        sinfo->signal = rssi;
 
        err = brcmf_fil_cmd_data_get(ifp, BRCMF_C_GET_GET_PKTCNTS, &pktcnt,
@@ -2521,10 +2521,10 @@ brcmf_cfg80211_get_station_ibss(struct brcmf_if *ifp,
                brcmf_err("BRCMF_C_GET_GET_PKTCNTS error (%d)\n", err);
                return err;
        }
-       sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS) |
-                        BIT(NL80211_STA_INFO_RX_DROP_MISC) |
-                        BIT(NL80211_STA_INFO_TX_PACKETS) |
-                        BIT(NL80211_STA_INFO_TX_FAILED);
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS) |
+                        BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC) |
+                        BIT_ULL(NL80211_STA_INFO_TX_PACKETS) |
+                        BIT_ULL(NL80211_STA_INFO_TX_FAILED);
        sinfo->rx_packets = le32_to_cpu(pktcnt.rx_good_pkt);
        sinfo->rx_dropped_misc = le32_to_cpu(pktcnt.rx_bad_pkt);
        sinfo->tx_packets = le32_to_cpu(pktcnt.tx_good_pkt);
@@ -2571,7 +2571,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
                }
        }
        brcmf_dbg(TRACE, "version %d\n", le16_to_cpu(sta_info_le.ver));
-       sinfo->filled = BIT(NL80211_STA_INFO_INACTIVE_TIME);
+       sinfo->filled = BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME);
        sinfo->inactive_time = le32_to_cpu(sta_info_le.idle) * 1000;
        sta_flags = le32_to_cpu(sta_info_le.flags);
        brcmf_convert_sta_flags(sta_flags, sinfo);
@@ -2581,33 +2581,33 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
        else
                sinfo->sta_flags.set &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
        if (sta_flags & BRCMF_STA_ASSOC) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_CONNECTED_TIME);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME);
                sinfo->connected_time = le32_to_cpu(sta_info_le.in);
                brcmf_fill_bss_param(ifp, sinfo);
        }
        if (sta_flags & BRCMF_STA_SCBSTATS) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
                sinfo->tx_failed = le32_to_cpu(sta_info_le.tx_failures);
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
                sinfo->tx_packets = le32_to_cpu(sta_info_le.tx_pkts);
                sinfo->tx_packets += le32_to_cpu(sta_info_le.tx_mcast_pkts);
-               sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS);
                sinfo->rx_packets = le32_to_cpu(sta_info_le.rx_ucast_pkts);
                sinfo->rx_packets += le32_to_cpu(sta_info_le.rx_mcast_pkts);
                if (sinfo->tx_packets) {
-                       sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
                        sinfo->txrate.legacy =
                                le32_to_cpu(sta_info_le.tx_rate) / 100;
                }
                if (sinfo->rx_packets) {
-                       sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
                        sinfo->rxrate.legacy =
                                le32_to_cpu(sta_info_le.rx_rate) / 100;
                }
                if (le16_to_cpu(sta_info_le.ver) >= 4) {
-                       sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES);
                        sinfo->tx_bytes = le64_to_cpu(sta_info_le.tx_tot_bytes);
-                       sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES);
                        sinfo->rx_bytes = le64_to_cpu(sta_info_le.rx_tot_bytes);
                }
                total_rssi = 0;
@@ -2623,10 +2623,10 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
                        }
                }
                if (count_rssi) {
-                       sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
                        sinfo->chains = count_rssi;
 
-                       sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
                        total_rssi /= count_rssi;
                        sinfo->signal = total_rssi;
                } else if (test_bit(BRCMF_VIF_STATUS_CONNECTED,
@@ -2639,7 +2639,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
                                goto done;
                        } else {
                                rssi = le32_to_cpu(scb_val.val);
-                               sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+                               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
                                sinfo->signal = rssi;
                                brcmf_dbg(CONN, "RSSI %d dBm\n", rssi);
                        }
index 72954fd6df3b0ad2e0b38fac9cbd89843fec2ef0..b1f702faff4fba45cf4a86d778661eb277d54ae9 100644 (file)
@@ -21,6 +21,7 @@
 #include <net/cfg80211.h>
 #include <net/rtnetlink.h>
 #include <net/addrconf.h>
+#include <net/ieee80211_radiotap.h>
 #include <net/ipv6.h>
 #include <brcmu_utils.h>
 #include <brcmu_wifi.h>
@@ -404,6 +405,30 @@ void brcmf_netif_rx(struct brcmf_if *ifp, struct sk_buff *skb)
                netif_rx_ni(skb);
 }
 
+void brcmf_netif_mon_rx(struct brcmf_if *ifp, struct sk_buff *skb)
+{
+       if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_MONITOR_FMT_RADIOTAP)) {
+               /* Do nothing */
+       } else {
+               struct ieee80211_radiotap_header *radiotap;
+
+               /* TODO: use RX status to fill some radiotap data */
+               radiotap = skb_push(skb, sizeof(*radiotap));
+               memset(radiotap, 0, sizeof(*radiotap));
+               radiotap->it_len = cpu_to_le16(sizeof(*radiotap));
+
+               /* TODO: 4 bytes with receive status? */
+               skb->len -= 4;
+       }
+
+       skb->dev = ifp->ndev;
+       skb_reset_mac_header(skb);
+       skb->pkt_type = PACKET_OTHERHOST;
+       skb->protocol = htons(ETH_P_802_2);
+
+       brcmf_netif_rx(ifp, skb);
+}
+
 static int brcmf_rx_hdrpull(struct brcmf_pub *drvr, struct sk_buff *skb,
                            struct brcmf_if **ifp)
 {
index 401f50458686387ecf6fca317ffc53a08bf39695..dcf6e27cc16f2ac32497499d9ffcf4d771dff9a8 100644 (file)
@@ -121,6 +121,7 @@ struct brcmf_pub {
 
        struct brcmf_if *iflist[BRCMF_MAX_IFS];
        s32 if2bss[BRCMF_MAX_IFS];
+       struct brcmf_if *mon_if;
 
        struct mutex proto_block;
        unsigned char proto_buf[BRCMF_DCMD_MAXLEN];
@@ -216,6 +217,7 @@ void brcmf_txflowblock_if(struct brcmf_if *ifp,
                          enum brcmf_netif_stop_reason reason, bool state);
 void brcmf_txfinalize(struct brcmf_if *ifp, struct sk_buff *txp, bool success);
 void brcmf_netif_rx(struct brcmf_if *ifp, struct sk_buff *skb);
+void brcmf_netif_mon_rx(struct brcmf_if *ifp, struct sk_buff *skb);
 void brcmf_net_setcarrier(struct brcmf_if *ifp, bool on);
 int __init brcmf_core_init(void);
 void __exit brcmf_core_exit(void);
index 800a423c7bc2c3b1c9731a47b2e3bc3d7827490a..4db4d444407afa6a48806193cbbeb596b3863533 100644 (file)
@@ -48,6 +48,8 @@ static const struct brcmf_feat_fwcap brcmf_fwcap_map[] = {
        { BRCMF_FEAT_MBSS, "mbss" },
        { BRCMF_FEAT_MCHAN, "mchan" },
        { BRCMF_FEAT_P2P, "p2p" },
+       { BRCMF_FEAT_MONITOR, "monitor" },
+       { BRCMF_FEAT_MONITOR_FMT_RADIOTAP, "rtap" },
 };
 
 #ifdef DEBUG
index d1193825e55911525dd91df4b598ea2d630cfdeb..0b4974df353a5ab6ac3d5008f744c267a6e62740 100644 (file)
@@ -33,6 +33,8 @@
  * MFP: 802.11w Management Frame Protection.
  * GSCAN: enhanced scan offload feature.
  * FWSUP: Firmware supplicant.
+ * MONITOR: firmware can pass monitor packets to host.
+ * MONITOR_FMT_RADIOTAP: firmware provides monitor packets with radiotap header
  */
 #define BRCMF_FEAT_LIST \
        BRCMF_FEAT_DEF(MBSS) \
@@ -48,7 +50,9 @@
        BRCMF_FEAT_DEF(WOWL_ARP_ND) \
        BRCMF_FEAT_DEF(MFP) \
        BRCMF_FEAT_DEF(GSCAN) \
-       BRCMF_FEAT_DEF(FWSUP)
+       BRCMF_FEAT_DEF(FWSUP) \
+       BRCMF_FEAT_DEF(MONITOR) \
+       BRCMF_FEAT_DEF(MONITOR_FMT_RADIOTAP)
 
 /*
  * Quirks:
index 4b290705e3e64abd786dde964564d6d70f728627..d5bb81e887624ca4b17c737f430f9a5a4124d7b4 100644 (file)
 #define        BRCMF_BSS_INFO_VERSION  109 /* curr ver of brcmf_bss_info_le struct */
 #define BRCMF_BSS_RSSI_ON_CHANNEL      0x0002
 
-#define BRCMF_STA_WME              0x00000002      /* WMM association */
-#define BRCMF_STA_AUTHE            0x00000008      /* Authenticated */
-#define BRCMF_STA_ASSOC            0x00000010      /* Associated */
-#define BRCMF_STA_AUTHO            0x00000020      /* Authorized */
-#define BRCMF_STA_SCBSTATS         0x00004000      /* Per STA debug stats */
+#define BRCMF_STA_BRCM                 0x00000001      /* Running a Broadcom driver */
+#define BRCMF_STA_WME                  0x00000002      /* WMM association */
+#define BRCMF_STA_NONERP               0x00000004      /* No ERP */
+#define BRCMF_STA_AUTHE                        0x00000008      /* Authenticated */
+#define BRCMF_STA_ASSOC                        0x00000010      /* Associated */
+#define BRCMF_STA_AUTHO                        0x00000020      /* Authorized */
+#define BRCMF_STA_WDS                  0x00000040      /* Wireless Distribution System */
+#define BRCMF_STA_WDS_LINKUP           0x00000080      /* WDS traffic/probes flowing properly */
+#define BRCMF_STA_PS                   0x00000100      /* STA is in power save mode from AP's viewpoint */
+#define BRCMF_STA_APSD_BE              0x00000200      /* APSD delv/trigger for AC_BE is default enabled */
+#define BRCMF_STA_APSD_BK              0x00000400      /* APSD delv/trigger for AC_BK is default enabled */
+#define BRCMF_STA_APSD_VI              0x00000800      /* APSD delv/trigger for AC_VI is default enabled */
+#define BRCMF_STA_APSD_VO              0x00001000      /* APSD delv/trigger for AC_VO is default enabled */
+#define BRCMF_STA_N_CAP                        0x00002000      /* STA 802.11n capable */
+#define BRCMF_STA_SCBSTATS             0x00004000      /* Per STA debug stats */
+#define BRCMF_STA_AMPDU_CAP            0x00008000      /* STA AMPDU capable */
+#define BRCMF_STA_AMSDU_CAP            0x00010000      /* STA AMSDU capable */
+#define BRCMF_STA_MIMO_PS              0x00020000      /* mimo ps mode is enabled */
+#define BRCMF_STA_MIMO_RTS             0x00040000      /* send rts in mimo ps mode */
+#define BRCMF_STA_RIFS_CAP             0x00080000      /* rifs enabled */
+#define BRCMF_STA_VHT_CAP              0x00100000      /* STA VHT(11ac) capable */
+#define BRCMF_STA_WPS                  0x00200000      /* WPS state */
+#define BRCMF_STA_DWDS_CAP             0x01000000      /* DWDS CAP */
+#define BRCMF_STA_DWDS                 0x02000000      /* DWDS active */
 
 /* size of brcmf_scan_params not including variable length array */
 #define BRCMF_SCAN_PARAMS_FIXED_SIZE   64
 #define BRCMF_MFP_CAPABLE              1
 #define BRCMF_MFP_REQUIRED             2
 
+#define BRCMF_VHT_CAP_MCS_MAP_NSS_MAX  8
+
 /* MAX_CHUNK_LEN is the maximum length for data passing to firmware in each
  * ioctl. It is relatively small because firmware has small maximum size input
  * playload restriction for ioctls.
@@ -531,6 +552,8 @@ struct brcmf_sta_info_le {
                                                /* w/hi bit set if basic */
        __le32 in;              /* seconds elapsed since associated */
        __le32 listen_interval_inms; /* Min Listen interval in ms for STA */
+
+       /* Fields valid for ver >= 3 */
        __le32 tx_pkts; /* # of packets transmitted */
        __le32 tx_failures;     /* # of packets failed */
        __le32 rx_ucast_pkts;   /* # of unicast packets received */
@@ -539,6 +562,8 @@ struct brcmf_sta_info_le {
        __le32 rx_rate; /* Rate of last successful rx frame */
        __le32 rx_decrypt_succeeds;     /* # of packet decrypted successfully */
        __le32 rx_decrypt_failures;     /* # of packet decrypted failed */
+
+       /* Fields valid for ver >= 4 */
        __le32 tx_tot_pkts;    /* # of tx pkts (ucast + mcast) */
        __le32 rx_tot_pkts;    /* # of data packets recvd (uni + mcast) */
        __le32 tx_mcast_pkts;  /* # of mcast pkts txed */
@@ -575,6 +600,14 @@ struct brcmf_sta_info_le {
                                                */
        __le32 rx_pkts_retried;        /* # rx with retry bit set */
        __le32 tx_rate_fallback;       /* lowest fallback TX rate */
+
+       /* Fields valid for ver >= 5 */
+       struct {
+               __le32 count;                                   /* # rates in this set */
+               u8 rates[BRCMF_MAXRATES_IN_SET];                /* rates in 500kbps units w/hi bit set if basic */
+               u8 mcs[BRCMF_MCSSET_LEN];                       /* supported mcs index bit map */
+               __le16 vht_mcs[BRCMF_VHT_CAP_MCS_MAP_NSS_MAX];  /* supported mcs index bit map per nss */
+       } rateset_adv;
 };
 
 struct brcmf_chanspec_list {
index c40ba8855cd53ee6ad058af2d71bcb6bdae3f114..4e8397a0cbc8e891a739dad2a2a4d67ffd574237 100644 (file)
@@ -69,6 +69,8 @@
 #define BRCMF_MSGBUF_MAX_EVENTBUF_POST         8
 
 #define BRCMF_MSGBUF_PKT_FLAGS_FRAME_802_3     0x01
+#define BRCMF_MSGBUF_PKT_FLAGS_FRAME_802_11    0x02
+#define BRCMF_MSGBUF_PKT_FLAGS_FRAME_MASK      0x07
 #define BRCMF_MSGBUF_PKT_FLAGS_PRIO_SHIFT      5
 
 #define BRCMF_MSGBUF_TX_FLUSH_CNT1             32
@@ -1128,6 +1130,7 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf)
        struct sk_buff *skb;
        u16 data_offset;
        u16 buflen;
+       u16 flags;
        u32 idx;
        struct brcmf_if *ifp;
 
@@ -1137,6 +1140,7 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf)
        data_offset = le16_to_cpu(rx_complete->data_offset);
        buflen = le16_to_cpu(rx_complete->data_len);
        idx = le32_to_cpu(rx_complete->msg.request_id);
+       flags = le16_to_cpu(rx_complete->flags);
 
        skb = brcmf_msgbuf_get_pktid(msgbuf->drvr->bus_if->dev,
                                     msgbuf->rx_pktids, idx);
@@ -1150,6 +1154,20 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf)
 
        skb_trim(skb, buflen);
 
+       if ((flags & BRCMF_MSGBUF_PKT_FLAGS_FRAME_MASK) ==
+           BRCMF_MSGBUF_PKT_FLAGS_FRAME_802_11) {
+               ifp = msgbuf->drvr->mon_if;
+
+               if (!ifp) {
+                       brcmf_err("Received unexpected monitor pkt\n");
+                       brcmu_pkt_buf_free_skb(skb);
+                       return;
+               }
+
+               brcmf_netif_mon_rx(ifp, skb);
+               return;
+       }
+
        ifp = brcmf_get_ifp(msgbuf->drvr, rx_complete->msg.ifidx);
        if (!ifp || !ifp->ndev) {
                brcmf_err("Received pkt for invalid ifidx %d\n",
index 3a13d176b221eac96bb85dd6442b8fd27bcba1f7..35e3b101e5cf0fe41cfec624a52eb07a93ea34a5 100644 (file)
@@ -159,7 +159,7 @@ u16 read_radio_reg(struct brcms_phy *pi, u16 addr)
 {
        u16 data;
 
-       if ((addr == RADIO_IDCODE))
+       if (addr == RADIO_IDCODE)
                return 0xffff;
 
        switch (pi->pubpi.phy_type) {
index 1a187557982e6c2d63f9c37e37229cf375ffd135..bedec1606caa40010f51c74c421fd9c5eaa8246a 100644 (file)
@@ -16904,7 +16904,7 @@ static void wlc_phy_workarounds_nphy_rev3(struct brcms_phy *pi)
        }
 }
 
-void wlc_phy_workarounds_nphy_rev1(struct brcms_phy *pi)
+static void wlc_phy_workarounds_nphy_rev1(struct brcms_phy *pi)
 {
        static const u8 rfseq_rx2tx_events[] = {
                NPHY_RFSEQ_CMD_NOP,
index b9672da24a9d3d8f7b58cbe19cc282281cd271ef..b24bc57ca91b8a61bcbf0e26c12e3c3805e820d4 100644 (file)
@@ -213,7 +213,7 @@ static const s16 log_table[] = {
        30498,
        31267,
        32024,
-       32768
+       32767
 };
 
 #define LOG_TABLE_SIZE 32       /* log_table size */
index b8fd3cc90634d116af789caeae309444fffae42b..1ad83ef5f202e0f2d7ae6f1919d2b11aa21b52b2 100644 (file)
@@ -692,7 +692,7 @@ static void printk_buf(int level, const u8 * data, u32 len)
 
 static void schedule_reset(struct ipw2100_priv *priv)
 {
-       unsigned long now = get_seconds();
+       time64_t now = ktime_get_boottime_seconds();
 
        /* If we haven't received a reset request within the backoff period,
         * then we can reset the backoff interval so this reset occurs
@@ -701,10 +701,10 @@ static void schedule_reset(struct ipw2100_priv *priv)
            (now - priv->last_reset > priv->reset_backoff))
                priv->reset_backoff = 0;
 
-       priv->last_reset = get_seconds();
+       priv->last_reset = now;
 
        if (!(priv->status & STATUS_RESET_PENDING)) {
-               IPW_DEBUG_INFO("%s: Scheduling firmware restart (%ds).\n",
+               IPW_DEBUG_INFO("%s: Scheduling firmware restart (%llds).\n",
                               priv->net_dev->name, priv->reset_backoff);
                netif_carrier_off(priv->net_dev);
                netif_stop_queue(priv->net_dev);
@@ -2079,7 +2079,7 @@ static void isr_indicate_associated(struct ipw2100_priv *priv, u32 status)
        memcpy(priv->bssid, bssid, ETH_ALEN);
 
        priv->status |= STATUS_ASSOCIATING;
-       priv->connect_start = get_seconds();
+       priv->connect_start = ktime_get_boottime_seconds();
 
        schedule_delayed_work(&priv->wx_event_work, HZ / 10);
 }
@@ -4070,8 +4070,8 @@ static ssize_t show_internals(struct device *d, struct device_attribute *attr,
 #define DUMP_VAR(x,y) len += sprintf(buf + len, # x ": %" y "\n", priv-> x)
 
        if (priv->status & STATUS_ASSOCIATED)
-               len += sprintf(buf + len, "connected: %lu\n",
-                              get_seconds() - priv->connect_start);
+               len += sprintf(buf + len, "connected: %llu\n",
+                              ktime_get_boottime_seconds() - priv->connect_start);
        else
                len += sprintf(buf + len, "not connected\n");
 
@@ -4108,7 +4108,7 @@ static ssize_t show_internals(struct device *d, struct device_attribute *attr,
        DUMP_VAR(txq_stat.lo, "d");
 
        DUMP_VAR(ieee->scans, "d");
-       DUMP_VAR(reset_backoff, "d");
+       DUMP_VAR(reset_backoff, "lld");
 
        return len;
 }
@@ -6437,7 +6437,7 @@ static int ipw2100_suspend(struct pci_dev *pci_dev, pm_message_t state)
        pci_disable_device(pci_dev);
        pci_set_power_state(pci_dev, PCI_D3hot);
 
-       priv->suspend_at = get_seconds();
+       priv->suspend_at = ktime_get_boottime_seconds();
 
        mutex_unlock(&priv->action_mutex);
 
@@ -6482,7 +6482,7 @@ static int ipw2100_resume(struct pci_dev *pci_dev)
         * the queue of needed */
        netif_device_attach(dev);
 
-       priv->suspend_time = get_seconds() - priv->suspend_at;
+       priv->suspend_time = ktime_get_boottime_seconds() - priv->suspend_at;
 
        /* Bring the device back up */
        if (!(priv->status & STATUS_RF_KILL_SW))
index ce3e35f6b60f485faec5b0a9ba2ba9b35a435f8c..8c11c7fa2eefe2bb3c1108a8655272aad6b2d26e 100644 (file)
@@ -491,7 +491,7 @@ struct ipw2100_priv {
 
        /* Statistics */
        int resets;
-       int reset_backoff;
+       time64_t reset_backoff;
 
        /* Context */
        u8 essid[IW_ESSID_MAX_SIZE];
@@ -500,8 +500,8 @@ struct ipw2100_priv {
        u8 channel;
        int last_mode;
 
-       unsigned long connect_start;
-       unsigned long last_reset;
+       time64_t connect_start;
+       time64_t last_reset;
 
        u32 channel_mask;
        u32 fatal_error;
@@ -581,9 +581,9 @@ struct ipw2100_priv {
 
        int user_requested_scan;
 
-       /* Track time in suspend */
-       unsigned long suspend_at;
-       unsigned long suspend_time;
+       /* Track time in suspend, using CLOCK_BOOTTIME */
+       time64_t suspend_at;
+       time64_t suspend_time;
 
        u32 interrupts;
        int tx_interrupts;
index 8a858f7e36f445374843b3090638f23ef28357be..9644e7b93645f510394b1deb3accf6976fc87a78 100644 (file)
@@ -7112,7 +7112,7 @@ static u32 ipw_qos_get_burst_duration(struct ipw_priv *priv)
 {
        u32 ret = 0;
 
-       if ((priv == NULL))
+       if (!priv)
                return 0;
 
        if (!(priv->ieee->modulation & LIBIPW_OFDM_MODULATION))
@@ -11888,7 +11888,7 @@ static int ipw_pci_suspend(struct pci_dev *pdev, pm_message_t state)
        pci_disable_device(pdev);
        pci_set_power_state(pdev, pci_choose_state(pdev, state));
 
-       priv->suspend_at = get_seconds();
+       priv->suspend_at = ktime_get_boottime_seconds();
 
        return 0;
 }
@@ -11925,7 +11925,7 @@ static int ipw_pci_resume(struct pci_dev *pdev)
         * the queue of needed */
        netif_device_attach(dev);
 
-       priv->suspend_time = get_seconds() - priv->suspend_at;
+       priv->suspend_time = ktime_get_boottime_seconds() - priv->suspend_at;
 
        /* Bring the device back up */
        schedule_work(&priv->up);
index aa301d1eee3cbe50435e48f6fdf5ba84ed7f22d4..f98ab1f71edd9c3d5c2465baf38da56d2b0d9e26 100644 (file)
@@ -1343,9 +1343,9 @@ struct ipw_priv {
 
        s8 tx_power;
 
-       /* Track time in suspend */
-       unsigned long suspend_at;
-       unsigned long suspend_time;
+       /* Track time in suspend using CLOCK_BOOTIME */
+       time64_t suspend_at;
+       time64_t suspend_time;
 
 #ifdef CONFIG_PM
        u32 pm_state[16];
index a6e072234398e982733d8046e09cdb29967fd995..26021bc55e981cf727c5682b36f8e3057c05d0a8 100644 (file)
@@ -4216,7 +4216,7 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw,
 
        if (mvmsta->avg_energy) {
                sinfo->signal_avg = mvmsta->avg_energy;
-               sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
        }
 
        if (!fw_has_capa(&mvm->fw->ucode_capa,
@@ -4240,11 +4240,11 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw,
 
        sinfo->rx_beacon = mvmvif->beacon_stats.num_beacons +
                           mvmvif->beacon_stats.accu_num_beacons;
-       sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_RX);
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX);
        if (mvmvif->beacon_stats.avg_signal) {
                /* firmware only reports a value after RXing a few beacons */
                sinfo->rx_beacon_signal_avg = mvmvif->beacon_stats.avg_signal;
-               sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG);
        }
  unlock:
        mutex_unlock(&mvm->mutex);
index 18e819d964f1cdadf7616dd70321bce6c6bf43e3..998dfac0fcff359d3727fb5df313e257484378b6 100644 (file)
@@ -2,6 +2,7 @@
  * mac80211_hwsim - software simulator of 802.11 radio(s) for mac80211
  * Copyright (c) 2008, Jouni Malinen <j@w1.fi>
  * Copyright (c) 2011, Javier Lopez <jlopex@gmail.com>
+ * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2517,6 +2518,123 @@ out_err:
        nlmsg_free(mcast_skb);
 }
 
+static const struct ieee80211_sband_iftype_data he_capa_2ghz = {
+       /* TODO: should we support other types, e.g., P2P?*/
+       .types_mask = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_AP),
+       .he_cap = {
+               .has_he = true,
+               .he_cap_elem = {
+                       .mac_cap_info[0] =
+                               IEEE80211_HE_MAC_CAP0_HTC_HE,
+                       .mac_cap_info[1] =
+                               IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US |
+                               IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8,
+                       .mac_cap_info[2] =
+                               IEEE80211_HE_MAC_CAP2_BSR |
+                               IEEE80211_HE_MAC_CAP2_MU_CASCADING |
+                               IEEE80211_HE_MAC_CAP2_ACK_EN,
+                       .mac_cap_info[3] =
+                               IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU |
+                               IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
+                               IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2,
+                       .mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
+                       .phy_cap_info[0] =
+                               IEEE80211_HE_PHY_CAP0_DUAL_BAND,
+                       .phy_cap_info[1] =
+                               IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK |
+                               IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
+                               IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD |
+                               IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS,
+                       .phy_cap_info[2] =
+                               IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
+                               IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
+                               IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ |
+                               IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO |
+                               IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO,
+
+                       /* Leave all the other PHY capability bytes unset, as
+                        * DCM, beam forming, RU and PPE threshold information
+                        * are not supported
+                        */
+               },
+               .he_mcs_nss_supp = {
+                       .rx_mcs_80 = cpu_to_le16(0xfffa),
+                       .tx_mcs_80 = cpu_to_le16(0xfffa),
+                       .rx_mcs_160 = cpu_to_le16(0xffff),
+                       .tx_mcs_160 = cpu_to_le16(0xffff),
+                       .rx_mcs_80p80 = cpu_to_le16(0xffff),
+                       .tx_mcs_80p80 = cpu_to_le16(0xffff),
+               },
+       },
+};
+
+static const struct ieee80211_sband_iftype_data he_capa_5ghz = {
+       /* TODO: should we support other types, e.g., P2P?*/
+       .types_mask = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_AP),
+       .he_cap = {
+               .has_he = true,
+               .he_cap_elem = {
+                       .mac_cap_info[0] =
+                               IEEE80211_HE_MAC_CAP0_HTC_HE,
+                       .mac_cap_info[1] =
+                               IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US |
+                               IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8,
+                       .mac_cap_info[2] =
+                               IEEE80211_HE_MAC_CAP2_BSR |
+                               IEEE80211_HE_MAC_CAP2_MU_CASCADING |
+                               IEEE80211_HE_MAC_CAP2_ACK_EN,
+                       .mac_cap_info[3] =
+                               IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU |
+                               IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
+                               IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2,
+                       .mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
+                       .phy_cap_info[0] =
+                               IEEE80211_HE_PHY_CAP0_DUAL_BAND |
+                               IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
+                               IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
+                               IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G,
+                       .phy_cap_info[1] =
+                               IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK |
+                               IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
+                               IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD |
+                               IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS,
+                       .phy_cap_info[2] =
+                               IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
+                               IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
+                               IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ |
+                               IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO |
+                               IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO,
+
+                       /* Leave all the other PHY capability bytes unset, as
+                        * DCM, beam forming, RU and PPE threshold information
+                        * are not supported
+                        */
+               },
+               .he_mcs_nss_supp = {
+                       .rx_mcs_80 = cpu_to_le16(0xfffa),
+                       .tx_mcs_80 = cpu_to_le16(0xfffa),
+                       .rx_mcs_160 = cpu_to_le16(0xfffa),
+                       .tx_mcs_160 = cpu_to_le16(0xfffa),
+                       .rx_mcs_80p80 = cpu_to_le16(0xfffa),
+                       .tx_mcs_80p80 = cpu_to_le16(0xfffa),
+               },
+       },
+};
+
+static void mac80211_hswim_he_capab(struct ieee80211_supported_band *sband)
+{
+       if (sband->band == NL80211_BAND_2GHZ)
+               sband->iftype_data =
+                       (struct ieee80211_sband_iftype_data *)&he_capa_2ghz;
+       else if (sband->band == NL80211_BAND_5GHZ)
+               sband->iftype_data =
+                       (struct ieee80211_sband_iftype_data *)&he_capa_5ghz;
+       else
+               return;
+
+       sband->n_iftype_data = 1;
+}
+
 static int mac80211_hwsim_new_radio(struct genl_info *info,
                                    struct hwsim_new_radio_params *param)
 {
@@ -2678,6 +2796,9 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 
        for (band = NL80211_BAND_2GHZ; band < NUM_NL80211_BANDS; band++) {
                struct ieee80211_supported_band *sband = &data->bands[band];
+
+               sband->band = band;
+
                switch (band) {
                case NL80211_BAND_2GHZ:
                        sband->channels = data->channels_2ghz;
@@ -2734,6 +2855,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
                sband->ht_cap.mcs.rx_mask[1] = 0xff;
                sband->ht_cap.mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
 
+               mac80211_hswim_he_capab(sband);
+
                hw->wiphy->bands[band] = sband;
        }
 
index f99031cfdf868e14509dee1db8e65c0ee099b4d0..57edfada0665fda7ec68b7f60128eef3af9a5219 100644 (file)
@@ -1559,10 +1559,10 @@ static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev,
        int ret;
        size_t i;
 
-       sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES) |
-                        BIT(NL80211_STA_INFO_TX_PACKETS) |
-                        BIT(NL80211_STA_INFO_RX_BYTES) |
-                        BIT(NL80211_STA_INFO_RX_PACKETS);
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES) |
+                        BIT_ULL(NL80211_STA_INFO_TX_PACKETS) |
+                        BIT_ULL(NL80211_STA_INFO_RX_BYTES) |
+                        BIT_ULL(NL80211_STA_INFO_RX_PACKETS);
        sinfo->tx_bytes = priv->dev->stats.tx_bytes;
        sinfo->tx_packets = priv->dev->stats.tx_packets;
        sinfo->rx_bytes = priv->dev->stats.rx_bytes;
@@ -1572,14 +1572,14 @@ static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev,
        ret = lbs_get_rssi(priv, &signal, &noise);
        if (ret == 0) {
                sinfo->signal = signal;
-               sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
        }
 
        /* Convert priv->cur_rate from hw_value to NL80211 value */
        for (i = 0; i < ARRAY_SIZE(lbs_rates); i++) {
                if (priv->cur_rate == lbs_rates[i].hw_value) {
                        sinfo->txrate.legacy = lbs_rates[i].bitrate;
-                       sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
                        break;
                }
        }
index dd1ee1f0af48968046c4ca605e90ae53754fba89..469134930026520bd6253ecee125af5cccee19b3 100644 (file)
@@ -104,6 +104,7 @@ struct lbs_private {
        u8 fw_ready;
        u8 surpriseremoved;
        u8 setup_fw_on_resume;
+       u8 power_up_on_resume;
        int (*hw_host_to_card) (struct lbs_private *priv, u8 type, u8 *payload, u16 nb);
        void (*reset_card) (struct lbs_private *priv);
        int (*power_save) (struct lbs_private *priv);
index 2300e796c6ab9e8106e132b3988fa240dc4783a4..43743c26c071f538f1942696aa97d20b0cbf091d 100644 (file)
@@ -1290,15 +1290,23 @@ static void if_sdio_remove(struct sdio_func *func)
 static int if_sdio_suspend(struct device *dev)
 {
        struct sdio_func *func = dev_to_sdio_func(dev);
-       int ret;
        struct if_sdio_card *card = sdio_get_drvdata(func);
+       struct lbs_private *priv = card->priv;
+       int ret;
 
        mmc_pm_flag_t flags = sdio_get_host_pm_caps(func);
+       priv->power_up_on_resume = false;
 
        /* If we're powered off anyway, just let the mmc layer remove the
         * card. */
-       if (!lbs_iface_active(card->priv))
-               return -ENOSYS;
+       if (!lbs_iface_active(priv)) {
+               if (priv->fw_ready) {
+                       priv->power_up_on_resume = true;
+                       if_sdio_power_off(card);
+               }
+
+               return 0;
+       }
 
        dev_info(dev, "%s: suspend: PM flags = 0x%x\n",
                 sdio_func_id(func), flags);
@@ -1306,9 +1314,14 @@ static int if_sdio_suspend(struct device *dev)
        /* If we aren't being asked to wake on anything, we should bail out
         * and let the SD stack power down the card.
         */
-       if (card->priv->wol_criteria == EHS_REMOVE_WAKEUP) {
+       if (priv->wol_criteria == EHS_REMOVE_WAKEUP) {
                dev_info(dev, "Suspend without wake params -- powering down card\n");
-               return -ENOSYS;
+               if (priv->fw_ready) {
+                       priv->power_up_on_resume = true;
+                       if_sdio_power_off(card);
+               }
+
+               return 0;
        }
 
        if (!(flags & MMC_PM_KEEP_POWER)) {
@@ -1321,7 +1334,7 @@ static int if_sdio_suspend(struct device *dev)
        if (ret)
                return ret;
 
-       ret = lbs_suspend(card->priv);
+       ret = lbs_suspend(priv);
        if (ret)
                return ret;
 
@@ -1336,6 +1349,11 @@ static int if_sdio_resume(struct device *dev)
 
        dev_info(dev, "%s: resume: we're back\n", sdio_func_id(func));
 
+       if (card->priv->power_up_on_resume) {
+               if_sdio_power_on(card);
+               wait_event(card->pwron_waitq, card->priv->fw_ready);
+       }
+
        ret = lbs_resume(card->priv);
 
        return ret;
index ffea610f67e2f661935dd6d7c0a7b4d592dbc35b..c67a8e7be31069febfb0b0d15c77b243fe3485dd 100644 (file)
@@ -614,6 +614,7 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff,
                                      struct if_usb_card *cardp,
                                      struct lbs_private *priv)
 {
+       unsigned long flags;
        u8 i;
 
        if (recvlength > LBS_CMD_BUFFER_SIZE) {
@@ -623,9 +624,7 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff,
                return;
        }
 
-       BUG_ON(!in_interrupt());
-
-       spin_lock(&priv->driver_lock);
+       spin_lock_irqsave(&priv->driver_lock, flags);
 
        i = (priv->resp_idx == 0) ? 1 : 0;
        BUG_ON(priv->resp_len[i]);
@@ -635,7 +634,7 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff,
        kfree_skb(skb);
        lbs_notify_command_response(priv, i);
 
-       spin_unlock(&priv->driver_lock);
+       spin_unlock_irqrestore(&priv->driver_lock, flags);
 
        lbs_deb_usbd(&cardp->udev->dev,
                    "Wake up main thread to handle cmd response\n");
index 5153922e7ce180cfcbdce18e97ed569e79b3f14e..e92fc5001171714e50bfb36bd195d3d8ef421ced 100644 (file)
@@ -603,6 +603,8 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff,
                                      struct if_usb_card *cardp,
                                      struct lbtf_private *priv)
 {
+       unsigned long flags;
+
        if (recvlength > LBS_CMD_BUFFER_SIZE) {
                lbtf_deb_usbd(&cardp->udev->dev,
                             "The receive buffer is too large\n");
@@ -610,14 +612,12 @@ static inline void process_cmdrequest(int recvlength, uint8_t *recvbuff,
                return;
        }
 
-       BUG_ON(!in_interrupt());
-
-       spin_lock(&priv->driver_lock);
+       spin_lock_irqsave(&priv->driver_lock, flags);
        memcpy(priv->cmd_resp_buff, recvbuff + MESSAGE_HEADER_LEN,
               recvlength - MESSAGE_HEADER_LEN);
        kfree_skb(skb);
        lbtf_cmd_response_rx(priv);
-       spin_unlock(&priv->driver_lock);
+       spin_unlock_irqrestore(&priv->driver_lock, flags);
 }
 
 /**
index 4b5ae9098504bd6b28e83625aa40bda5a02d6c61..c02e02c17c9cf5d2393f85c25f77d2c9900912eb 100644 (file)
@@ -1353,17 +1353,17 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
 {
        u32 rate;
 
-       sinfo->filled = BIT(NL80211_STA_INFO_RX_BYTES) | BIT(NL80211_STA_INFO_TX_BYTES) |
-                       BIT(NL80211_STA_INFO_RX_PACKETS) | BIT(NL80211_STA_INFO_TX_PACKETS) |
-                       BIT(NL80211_STA_INFO_TX_BITRATE) |
-                       BIT(NL80211_STA_INFO_SIGNAL) | BIT(NL80211_STA_INFO_SIGNAL_AVG);
+       sinfo->filled = BIT_ULL(NL80211_STA_INFO_RX_BYTES) | BIT_ULL(NL80211_STA_INFO_TX_BYTES) |
+                       BIT_ULL(NL80211_STA_INFO_RX_PACKETS) | BIT_ULL(NL80211_STA_INFO_TX_PACKETS) |
+                       BIT_ULL(NL80211_STA_INFO_TX_BITRATE) |
+                       BIT_ULL(NL80211_STA_INFO_SIGNAL) | BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
 
        if (GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_UAP) {
                if (!node)
                        return -ENOENT;
 
-               sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME) |
-                               BIT(NL80211_STA_INFO_TX_FAILED);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) |
+                               BIT_ULL(NL80211_STA_INFO_TX_FAILED);
                sinfo->inactive_time =
                        jiffies_to_msecs(jiffies - node->stats.last_rx);
 
@@ -1413,7 +1413,7 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
        sinfo->txrate.legacy = rate * 5;
 
        if (priv->bss_mode == NL80211_IFTYPE_STATION) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BSS_PARAM);
                sinfo->bss_param.flags = 0;
                if (priv->curr_bss_params.bss_descriptor.cap_info_bitmap &
                                                WLAN_CAPABILITY_SHORT_PREAMBLE)
index 510f6b8e717d7f52eb2cdbb6e334e062f49053af..fa3e8ddfe9a93f78382aaa5031fcd15a29b7be09 100644 (file)
@@ -1279,7 +1279,8 @@ static struct net_device_stats *mwifiex_get_stats(struct net_device *dev)
 
 static u16
 mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb,
-                               void *accel_priv, select_queue_fallback_t fallback)
+                               struct net_device *sb_dev,
+                               select_queue_fallback_t fallback)
 {
        skb->priority = cfg80211_classify8021d(skb, NULL);
        return mwifiex_1d_to_wmm_queue[skb->priority];
index 1e6a62c69ac52bfb285a400d883287ca422f1998..5ce85d5727e4b882ebc37372f03bb49003d1a0c9 100644 (file)
@@ -289,32 +289,6 @@ int mwifiex_uap_recv_packet(struct mwifiex_private *priv,
                src_node->stats.rx_packets++;
        }
 
-       skb->dev = priv->netdev;
-       skb->protocol = eth_type_trans(skb, priv->netdev);
-       skb->ip_summed = CHECKSUM_NONE;
-
-       /* This is required only in case of 11n and USB/PCIE as we alloc
-        * a buffer of 4K only if its 11N (to be able to receive 4K
-        * AMSDU packets). In case of SD we allocate buffers based
-        * on the size of packet and hence this is not needed.
-        *
-        * Modifying the truesize here as our allocation for each
-        * skb is 4K but we only receive 2K packets and this cause
-        * the kernel to start dropping packets in case where
-        * application has allocated buffer based on 2K size i.e.
-        * if there a 64K packet received (in IP fragments and
-        * application allocates 64K to receive this packet but
-        * this packet would almost double up because we allocate
-        * each 1.5K fragment in 4K and pass it up. As soon as the
-        * 64K limit hits kernel will start to drop rest of the
-        * fragments. Currently we fail the Filesndl-ht.scr script
-        * for UDP, hence this fix
-        */
-       if ((adapter->iface_type == MWIFIEX_USB ||
-            adapter->iface_type == MWIFIEX_PCIE) &&
-           (skb->truesize > MWIFIEX_RX_DATA_BUF_SIZE))
-               skb->truesize += (skb->len - MWIFIEX_RX_DATA_BUF_SIZE);
-
        if (is_multicast_ether_addr(p_ethhdr->h_dest) ||
            mwifiex_get_sta_entry(priv, p_ethhdr->h_dest)) {
                if (skb_headroom(skb) < MWIFIEX_MIN_DATA_HEADER_LEN)
@@ -350,6 +324,32 @@ int mwifiex_uap_recv_packet(struct mwifiex_private *priv,
                        return 0;
        }
 
+       skb->dev = priv->netdev;
+       skb->protocol = eth_type_trans(skb, priv->netdev);
+       skb->ip_summed = CHECKSUM_NONE;
+
+       /* This is required only in case of 11n and USB/PCIE as we alloc
+        * a buffer of 4K only if its 11N (to be able to receive 4K
+        * AMSDU packets). In case of SD we allocate buffers based
+        * on the size of packet and hence this is not needed.
+        *
+        * Modifying the truesize here as our allocation for each
+        * skb is 4K but we only receive 2K packets and this cause
+        * the kernel to start dropping packets in case where
+        * application has allocated buffer based on 2K size i.e.
+        * if there a 64K packet received (in IP fragments and
+        * application allocates 64K to receive this packet but
+        * this packet would almost double up because we allocate
+        * each 1.5K fragment in 4K and pass it up. As soon as the
+        * 64K limit hits kernel will start to drop rest of the
+        * fragments. Currently we fail the Filesndl-ht.scr script
+        * for UDP, hence this fix
+        */
+       if ((adapter->iface_type == MWIFIEX_USB ||
+            adapter->iface_type == MWIFIEX_PCIE) &&
+           skb->truesize > MWIFIEX_RX_DATA_BUF_SIZE)
+               skb->truesize += (skb->len - MWIFIEX_RX_DATA_BUF_SIZE);
+
        /* Forward multicast/broadcast packet to upper layer*/
        if (in_interrupt())
                netif_rx(skb);
index d2166fbf50ff3b561439f5a2a19eb54f6911887a..96e9798bb8a02e549bfed971198b8fa98d064d8f 100644 (file)
@@ -390,6 +390,18 @@ struct dentry *mt76_register_debugfs(struct mt76_dev *dev);
 int mt76_eeprom_init(struct mt76_dev *dev, int len);
 void mt76_eeprom_override(struct mt76_dev *dev);
 
+/* increment with wrap-around */
+static inline int mt76_incr(int val, int size)
+{
+       return (val + 1) & (size - 1);
+}
+
+/* decrement with wrap-around */
+static inline int mt76_decr(int val, int size)
+{
+       return (val - 1) & (size - 1);
+}
+
 static inline struct ieee80211_txq *
 mtxq_to_txq(struct mt76_txq *mtxq)
 {
index dc12bbdbb2ee334b74ddb536513357dbc7b240d3..71fcfa44fb2e3aebfb39fda33e705ddd6a32eff2 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/mutex.h>
 #include <linux/bitops.h>
 #include <linux/kfifo.h>
+#include <linux/average.h>
 
 #define MT7662_FIRMWARE                "mt7662.bin"
 #define MT7662_ROM_PATCH       "mt7662_rom_patch.bin"
@@ -47,6 +48,8 @@
 #include "mt76x2_mac.h"
 #include "mt76x2_dfs.h"
 
+DECLARE_EWMA(signal, 10, 8)
+
 struct mt76x2_mcu {
        struct mutex mutex;
 
@@ -69,9 +72,8 @@ struct mt76x2_calibration {
        u8 agc_gain_init[MT_MAX_CHAINS];
        u8 agc_gain_cur[MT_MAX_CHAINS];
 
-       int avg_rssi[MT_MAX_CHAINS];
-       int avg_rssi_all;
-
+       u16 false_cca;
+       s8 avg_rssi_all;
        s8 agc_gain_adjust;
        s8 low_gain;
 
@@ -120,10 +122,13 @@ struct mt76x2_dev {
        u8 beacon_mask;
        u8 beacon_data_mask;
 
-       u32 rxfilter;
+       u8 tbtt_count;
+       u16 beacon_int;
 
        u16 chainmask;
 
+       u32 rxfilter;
+
        struct mt76x2_calibration cal;
 
        s8 target_power;
@@ -149,6 +154,9 @@ struct mt76x2_sta {
        struct mt76x2_vif *vif;
        struct mt76x2_tx_status status;
        int n_frames;
+
+       struct ewma_signal rssi;
+       int inactive_count;
 };
 
 static inline bool is_mt7612(struct mt76x2_dev *dev)
index 955ea3e692dd0d9e4f784894029828f42539b5f2..74725902e6dc7587f530ca6d75ceb86b170c8dbb 100644 (file)
@@ -91,12 +91,20 @@ mt76x2_dfs_stat_read(struct seq_file *file, void *data)
        struct mt76x2_dev *dev = file->private;
        struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
 
+       seq_printf(file, "allocated sequences:\t%d\n",
+                  dfs_pd->seq_stats.seq_pool_len);
+       seq_printf(file, "used sequences:\t\t%d\n",
+                  dfs_pd->seq_stats.seq_len);
+       seq_puts(file, "\n");
+
        for (i = 0; i < MT_DFS_NUM_ENGINES; i++) {
                seq_printf(file, "engine: %d\n", i);
                seq_printf(file, "  hw pattern detected:\t%d\n",
                           dfs_pd->stats[i].hw_pattern);
                seq_printf(file, "  hw pulse discarded:\t%d\n",
                           dfs_pd->stats[i].hw_pulse_discarded);
+               seq_printf(file, "  sw pattern detected:\t%d\n",
+                          dfs_pd->stats[i].sw_pattern);
        }
 
        return 0;
@@ -115,6 +123,18 @@ static const struct file_operations fops_dfs_stat = {
        .release = single_release,
 };
 
+static int read_agc(struct seq_file *file, void *data)
+{
+       struct mt76x2_dev *dev = dev_get_drvdata(file->private);
+
+       seq_printf(file, "avg_rssi: %d\n", dev->cal.avg_rssi_all);
+       seq_printf(file, "low_gain: %d\n", dev->cal.low_gain);
+       seq_printf(file, "false_cca: %d\n", dev->cal.false_cca);
+       seq_printf(file, "agc_gain_adjust: %d\n", dev->cal.agc_gain_adjust);
+
+       return 0;
+}
+
 void mt76x2_init_debugfs(struct mt76x2_dev *dev)
 {
        struct dentry *dir;
@@ -130,4 +150,6 @@ void mt76x2_init_debugfs(struct mt76x2_dev *dev)
        debugfs_create_file("dfs_stats", 0400, dir, dev, &fops_dfs_stat);
        debugfs_create_devm_seqfile(dev->mt76.dev, "txpower", dir,
                                    read_txpower);
+
+       debugfs_create_devm_seqfile(dev->mt76.dev, "agc", dir, read_agc);
 }
index f936dc9a5476f786645b8353b93d8bba97768da6..374cc655c11d7a8e246ea0839c0c4db5459503fe 100644 (file)
@@ -159,6 +159,81 @@ static void mt76x2_dfs_set_capture_mode_ctrl(struct mt76x2_dev *dev,
        mt76_wr(dev, MT_BBP(DFS, 36), data);
 }
 
+static void mt76x2_dfs_seq_pool_put(struct mt76x2_dev *dev,
+                                   struct mt76x2_dfs_sequence *seq)
+{
+       struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+
+       list_add(&seq->head, &dfs_pd->seq_pool);
+
+       dfs_pd->seq_stats.seq_pool_len++;
+       dfs_pd->seq_stats.seq_len--;
+}
+
+static
+struct mt76x2_dfs_sequence *mt76x2_dfs_seq_pool_get(struct mt76x2_dev *dev)
+{
+       struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+       struct mt76x2_dfs_sequence *seq;
+
+       if (list_empty(&dfs_pd->seq_pool)) {
+               seq = devm_kzalloc(dev->mt76.dev, sizeof(*seq), GFP_ATOMIC);
+       } else {
+               seq = list_first_entry(&dfs_pd->seq_pool,
+                                      struct mt76x2_dfs_sequence,
+                                      head);
+               list_del(&seq->head);
+               dfs_pd->seq_stats.seq_pool_len--;
+       }
+       if (seq)
+               dfs_pd->seq_stats.seq_len++;
+
+       return seq;
+}
+
+static int mt76x2_dfs_get_multiple(int val, int frac, int margin)
+{
+       int remainder, factor;
+
+       if (!frac)
+               return 0;
+
+       if (abs(val - frac) <= margin)
+               return 1;
+
+       factor = val / frac;
+       remainder = val % frac;
+
+       if (remainder > margin) {
+               if ((frac - remainder) <= margin)
+                       factor++;
+               else
+                       factor = 0;
+       }
+       return factor;
+}
+
+static void mt76x2_dfs_detector_reset(struct mt76x2_dev *dev)
+{
+       struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+       struct mt76x2_dfs_sequence *seq, *tmp_seq;
+       int i;
+
+       /* reset hw detector */
+       mt76_wr(dev, MT_BBP(DFS, 1), 0xf);
+
+       /* reset sw detector */
+       for (i = 0; i < ARRAY_SIZE(dfs_pd->event_rb); i++) {
+               dfs_pd->event_rb[i].h_rb = 0;
+               dfs_pd->event_rb[i].t_rb = 0;
+       }
+
+       list_for_each_entry_safe(seq, tmp_seq, &dfs_pd->sequences, head) {
+               list_del_init(&seq->head);
+               mt76x2_dfs_seq_pool_put(dev, seq);
+       }
+}
+
 static bool mt76x2_dfs_check_chirp(struct mt76x2_dev *dev)
 {
        bool ret = false;
@@ -295,6 +370,256 @@ static bool mt76x2_dfs_check_hw_pulse(struct mt76x2_dev *dev,
        return ret;
 }
 
+static bool mt76x2_dfs_fetch_event(struct mt76x2_dev *dev,
+                                  struct mt76x2_dfs_event *event)
+{
+       u32 data;
+
+       /* 1st: DFS_R37[31]: 0 (engine 0) - 1 (engine 2)
+        * 2nd: DFS_R37[21:0]: pulse time
+        * 3rd: DFS_R37[11:0]: pulse width
+        * 3rd: DFS_R37[25:16]: phase
+        * 4th: DFS_R37[12:0]: current pwr
+        * 4th: DFS_R37[21:16]: pwr stable counter
+        *
+        * 1st: DFS_R37[31:0] set to 0xffffffff means no event detected
+        */
+       data = mt76_rr(dev, MT_BBP(DFS, 37));
+       if (!MT_DFS_CHECK_EVENT(data))
+               return false;
+
+       event->engine = MT_DFS_EVENT_ENGINE(data);
+       data = mt76_rr(dev, MT_BBP(DFS, 37));
+       event->ts = MT_DFS_EVENT_TIMESTAMP(data);
+       data = mt76_rr(dev, MT_BBP(DFS, 37));
+       event->width = MT_DFS_EVENT_WIDTH(data);
+
+       return true;
+}
+
+static bool mt76x2_dfs_check_event(struct mt76x2_dev *dev,
+                                  struct mt76x2_dfs_event *event)
+{
+       if (event->engine == 2) {
+               struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+               struct mt76x2_dfs_event_rb *event_buff = &dfs_pd->event_rb[1];
+               u16 last_event_idx;
+               u32 delta_ts;
+
+               last_event_idx = mt76_decr(event_buff->t_rb,
+                                          MT_DFS_EVENT_BUFLEN);
+               delta_ts = event->ts - event_buff->data[last_event_idx].ts;
+               if (delta_ts < MT_DFS_EVENT_TIME_MARGIN &&
+                   event_buff->data[last_event_idx].width >= 200)
+                       return false;
+       }
+       return true;
+}
+
+static void mt76x2_dfs_queue_event(struct mt76x2_dev *dev,
+                                  struct mt76x2_dfs_event *event)
+{
+       struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+       struct mt76x2_dfs_event_rb *event_buff;
+
+       /* add radar event to ring buffer */
+       event_buff = event->engine == 2 ? &dfs_pd->event_rb[1]
+                                       : &dfs_pd->event_rb[0];
+       event_buff->data[event_buff->t_rb] = *event;
+       event_buff->data[event_buff->t_rb].fetch_ts = jiffies;
+
+       event_buff->t_rb = mt76_incr(event_buff->t_rb, MT_DFS_EVENT_BUFLEN);
+       if (event_buff->t_rb == event_buff->h_rb)
+               event_buff->h_rb = mt76_incr(event_buff->h_rb,
+                                            MT_DFS_EVENT_BUFLEN);
+}
+
+static int mt76x2_dfs_create_sequence(struct mt76x2_dev *dev,
+                                     struct mt76x2_dfs_event *event,
+                                     u16 cur_len)
+{
+       struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+       struct mt76x2_dfs_sw_detector_params *sw_params;
+       u32 width_delta, with_sum, factor, cur_pri;
+       struct mt76x2_dfs_sequence seq, *seq_p;
+       struct mt76x2_dfs_event_rb *event_rb;
+       struct mt76x2_dfs_event *cur_event;
+       int i, j, end, pri;
+
+       event_rb = event->engine == 2 ? &dfs_pd->event_rb[1]
+                                     : &dfs_pd->event_rb[0];
+
+       i = mt76_decr(event_rb->t_rb, MT_DFS_EVENT_BUFLEN);
+       end = mt76_decr(event_rb->h_rb, MT_DFS_EVENT_BUFLEN);
+
+       while (i != end) {
+               cur_event = &event_rb->data[i];
+               with_sum = event->width + cur_event->width;
+
+               sw_params = &dfs_pd->sw_dpd_params;
+               switch (dev->dfs_pd.region) {
+               case NL80211_DFS_FCC:
+               case NL80211_DFS_JP:
+                       if (with_sum < 600)
+                               width_delta = 8;
+                       else
+                               width_delta = with_sum >> 3;
+                       break;
+               case NL80211_DFS_ETSI:
+                       if (event->engine == 2)
+                               width_delta = with_sum >> 6;
+                       else if (with_sum < 620)
+                               width_delta = 24;
+                       else
+                               width_delta = 8;
+                       break;
+               case NL80211_DFS_UNSET:
+               default:
+                       return -EINVAL;
+               }
+
+               pri = event->ts - cur_event->ts;
+               if (abs(event->width - cur_event->width) > width_delta ||
+                   pri < sw_params->min_pri)
+                       goto next;
+
+               if (pri > sw_params->max_pri)
+                       break;
+
+               seq.pri = event->ts - cur_event->ts;
+               seq.first_ts = cur_event->ts;
+               seq.last_ts = event->ts;
+               seq.engine = event->engine;
+               seq.count = 2;
+
+               j = mt76_decr(i, MT_DFS_EVENT_BUFLEN);
+               while (j != end) {
+                       cur_event = &event_rb->data[j];
+                       cur_pri = event->ts - cur_event->ts;
+                       factor = mt76x2_dfs_get_multiple(cur_pri, seq.pri,
+                                               sw_params->pri_margin);
+                       if (factor > 0) {
+                               seq.first_ts = cur_event->ts;
+                               seq.count++;
+                       }
+
+                       j = mt76_decr(j, MT_DFS_EVENT_BUFLEN);
+               }
+               if (seq.count <= cur_len)
+                       goto next;
+
+               seq_p = mt76x2_dfs_seq_pool_get(dev);
+               if (!seq_p)
+                       return -ENOMEM;
+
+               *seq_p = seq;
+               INIT_LIST_HEAD(&seq_p->head);
+               list_add(&seq_p->head, &dfs_pd->sequences);
+next:
+               i = mt76_decr(i, MT_DFS_EVENT_BUFLEN);
+       }
+       return 0;
+}
+
+static u16 mt76x2_dfs_add_event_to_sequence(struct mt76x2_dev *dev,
+                                           struct mt76x2_dfs_event *event)
+{
+       struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+       struct mt76x2_dfs_sw_detector_params *sw_params;
+       struct mt76x2_dfs_sequence *seq, *tmp_seq;
+       u16 max_seq_len = 0;
+       u32 factor, pri;
+
+       sw_params = &dfs_pd->sw_dpd_params;
+       list_for_each_entry_safe(seq, tmp_seq, &dfs_pd->sequences, head) {
+               if (event->ts > seq->first_ts + MT_DFS_SEQUENCE_WINDOW) {
+                       list_del_init(&seq->head);
+                       mt76x2_dfs_seq_pool_put(dev, seq);
+                       continue;
+               }
+
+               if (event->engine != seq->engine)
+                       continue;
+
+               pri = event->ts - seq->last_ts;
+               factor = mt76x2_dfs_get_multiple(pri, seq->pri,
+                                                sw_params->pri_margin);
+               if (factor > 0) {
+                       seq->last_ts = event->ts;
+                       seq->count++;
+                       max_seq_len = max_t(u16, max_seq_len, seq->count);
+               }
+       }
+       return max_seq_len;
+}
+
+static bool mt76x2_dfs_check_detection(struct mt76x2_dev *dev)
+{
+       struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+       struct mt76x2_dfs_sequence *seq;
+
+       if (list_empty(&dfs_pd->sequences))
+               return false;
+
+       list_for_each_entry(seq, &dfs_pd->sequences, head) {
+               if (seq->count > MT_DFS_SEQUENCE_TH) {
+                       dfs_pd->stats[seq->engine].sw_pattern++;
+                       return true;
+               }
+       }
+       return false;
+}
+
+static void mt76x2_dfs_add_events(struct mt76x2_dev *dev)
+{
+       struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+       struct mt76x2_dfs_event event;
+       int i, seq_len;
+
+       /* disable debug mode */
+       mt76x2_dfs_set_capture_mode_ctrl(dev, false);
+       for (i = 0; i < MT_DFS_EVENT_LOOP; i++) {
+               if (!mt76x2_dfs_fetch_event(dev, &event))
+                       break;
+
+               if (dfs_pd->last_event_ts > event.ts)
+                       mt76x2_dfs_detector_reset(dev);
+               dfs_pd->last_event_ts = event.ts;
+
+               if (!mt76x2_dfs_check_event(dev, &event))
+                       continue;
+
+               seq_len = mt76x2_dfs_add_event_to_sequence(dev, &event);
+               mt76x2_dfs_create_sequence(dev, &event, seq_len);
+
+               mt76x2_dfs_queue_event(dev, &event);
+       }
+       mt76x2_dfs_set_capture_mode_ctrl(dev, true);
+}
+
+static void mt76x2_dfs_check_event_window(struct mt76x2_dev *dev)
+{
+       struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+       struct mt76x2_dfs_event_rb *event_buff;
+       struct mt76x2_dfs_event *event;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(dfs_pd->event_rb); i++) {
+               event_buff = &dfs_pd->event_rb[i];
+
+               while (event_buff->h_rb != event_buff->t_rb) {
+                       event = &event_buff->data[event_buff->h_rb];
+
+                       /* sorted list */
+                       if (time_is_after_jiffies(event->fetch_ts +
+                                                 MT_DFS_EVENT_WINDOW))
+                               break;
+                       event_buff->h_rb = mt76_incr(event_buff->h_rb,
+                                                    MT_DFS_EVENT_BUFLEN);
+               }
+       }
+}
+
 static void mt76x2_dfs_tasklet(unsigned long arg)
 {
        struct mt76x2_dev *dev = (struct mt76x2_dev *)arg;
@@ -305,6 +630,24 @@ static void mt76x2_dfs_tasklet(unsigned long arg)
        if (test_bit(MT76_SCANNING, &dev->mt76.state))
                goto out;
 
+       if (time_is_before_jiffies(dfs_pd->last_sw_check +
+                                  MT_DFS_SW_TIMEOUT)) {
+               bool radar_detected;
+
+               dfs_pd->last_sw_check = jiffies;
+
+               mt76x2_dfs_add_events(dev);
+               radar_detected = mt76x2_dfs_check_detection(dev);
+               if (radar_detected) {
+                       /* sw detector rx radar pattern */
+                       ieee80211_radar_detected(dev->mt76.hw);
+                       mt76x2_dfs_detector_reset(dev);
+
+                       return;
+               }
+               mt76x2_dfs_check_event_window(dev);
+       }
+
        engine_mask = mt76_rr(dev, MT_BBP(DFS, 1));
        if (!(engine_mask & 0xf))
                goto out;
@@ -326,9 +669,7 @@ static void mt76x2_dfs_tasklet(unsigned long arg)
                /* hw detector rx radar pattern */
                dfs_pd->stats[i].hw_pattern++;
                ieee80211_radar_detected(dev->mt76.hw);
-
-               /* reset hw detector */
-               mt76_wr(dev, MT_BBP(DFS, 1), 0xf);
+               mt76x2_dfs_detector_reset(dev);
 
                return;
        }
@@ -340,6 +681,32 @@ out:
        mt76x2_irq_enable(dev, MT_INT_GPTIMER);
 }
 
+static void mt76x2_dfs_init_sw_detector(struct mt76x2_dev *dev)
+{
+       struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
+
+       switch (dev->dfs_pd.region) {
+       case NL80211_DFS_FCC:
+               dfs_pd->sw_dpd_params.max_pri = MT_DFS_FCC_MAX_PRI;
+               dfs_pd->sw_dpd_params.min_pri = MT_DFS_FCC_MIN_PRI;
+               dfs_pd->sw_dpd_params.pri_margin = MT_DFS_PRI_MARGIN;
+               break;
+       case NL80211_DFS_ETSI:
+               dfs_pd->sw_dpd_params.max_pri = MT_DFS_ETSI_MAX_PRI;
+               dfs_pd->sw_dpd_params.min_pri = MT_DFS_ETSI_MIN_PRI;
+               dfs_pd->sw_dpd_params.pri_margin = MT_DFS_PRI_MARGIN << 2;
+               break;
+       case NL80211_DFS_JP:
+               dfs_pd->sw_dpd_params.max_pri = MT_DFS_JP_MAX_PRI;
+               dfs_pd->sw_dpd_params.min_pri = MT_DFS_JP_MIN_PRI;
+               dfs_pd->sw_dpd_params.pri_margin = MT_DFS_PRI_MARGIN;
+               break;
+       case NL80211_DFS_UNSET:
+       default:
+               break;
+       }
+}
+
 static void mt76x2_dfs_set_bbp_params(struct mt76x2_dev *dev)
 {
        u32 data;
@@ -462,6 +829,7 @@ void mt76x2_dfs_init_params(struct mt76x2_dev *dev)
 
        if ((chandef->chan->flags & IEEE80211_CHAN_RADAR) &&
            dev->dfs_pd.region != NL80211_DFS_UNSET) {
+               mt76x2_dfs_init_sw_detector(dev);
                mt76x2_dfs_set_bbp_params(dev);
                /* enable debug mode */
                mt76x2_dfs_set_capture_mode_ctrl(dev, true);
@@ -486,7 +854,10 @@ void mt76x2_dfs_init_detector(struct mt76x2_dev *dev)
 {
        struct mt76x2_dfs_pattern_detector *dfs_pd = &dev->dfs_pd;
 
+       INIT_LIST_HEAD(&dfs_pd->sequences);
+       INIT_LIST_HEAD(&dfs_pd->seq_pool);
        dfs_pd->region = NL80211_DFS_UNSET;
+       dfs_pd->last_sw_check = jiffies;
        tasklet_init(&dfs_pd->dfs_tasklet, mt76x2_dfs_tasklet,
                     (unsigned long)dev);
 }
index 8dbc783cc6bcc049d0503e6f9924a2f6d204e504..693f421bf096f797aa09b8f7e95d4c4f1cb817da 100644 (file)
 #define MT_DFS_PKT_END_MASK            0
 #define MT_DFS_CH_EN                   0xf
 
+/* sw detector params */
+#define MT_DFS_EVENT_LOOP              64
+#define MT_DFS_SW_TIMEOUT              (HZ / 20)
+#define MT_DFS_EVENT_WINDOW            (HZ / 5)
+#define MT_DFS_SEQUENCE_WINDOW         (200 * (1 << 20))
+#define MT_DFS_EVENT_TIME_MARGIN       2000
+#define MT_DFS_PRI_MARGIN              4
+#define MT_DFS_SEQUENCE_TH             6
+
+#define MT_DFS_FCC_MAX_PRI             ((28570 << 1) + 1000)
+#define MT_DFS_FCC_MIN_PRI             (3000 - 2)
+#define MT_DFS_JP_MAX_PRI              ((80000 << 1) + 1000)
+#define MT_DFS_JP_MIN_PRI              (28500 - 2)
+#define MT_DFS_ETSI_MAX_PRI            (133333 + 125000 + 117647 + 1000)
+#define MT_DFS_ETSI_MIN_PRI            (4500 - 20)
+
 struct mt76x2_radar_specs {
        u8 mode;
        u16 avg_len;
@@ -50,6 +66,32 @@ struct mt76x2_radar_specs {
        u16 pwr_jmp;
 };
 
+#define MT_DFS_CHECK_EVENT(x)          ((x) != GENMASK(31, 0))
+#define MT_DFS_EVENT_ENGINE(x)         (((x) & BIT(31)) ? 2 : 0)
+#define MT_DFS_EVENT_TIMESTAMP(x)      ((x) & GENMASK(21, 0))
+#define MT_DFS_EVENT_WIDTH(x)          ((x) & GENMASK(11, 0))
+struct mt76x2_dfs_event {
+       unsigned long fetch_ts;
+       u32 ts;
+       u16 width;
+       u8 engine;
+};
+
+#define MT_DFS_EVENT_BUFLEN            256
+struct mt76x2_dfs_event_rb {
+       struct mt76x2_dfs_event data[MT_DFS_EVENT_BUFLEN];
+       int h_rb, t_rb;
+};
+
+struct mt76x2_dfs_sequence {
+       struct list_head head;
+       u32 first_ts;
+       u32 last_ts;
+       u32 pri;
+       u16 count;
+       u8 engine;
+};
+
 struct mt76x2_dfs_hw_pulse {
        u8 engine;
        u32 period;
@@ -58,9 +100,21 @@ struct mt76x2_dfs_hw_pulse {
        u32 burst;
 };
 
+struct mt76x2_dfs_sw_detector_params {
+       u32 min_pri;
+       u32 max_pri;
+       u32 pri_margin;
+};
+
 struct mt76x2_dfs_engine_stats {
        u32 hw_pattern;
        u32 hw_pulse_discarded;
+       u32 sw_pattern;
+};
+
+struct mt76x2_dfs_seq_stats {
+       u32 seq_pool_len;
+       u32 seq_len;
 };
 
 struct mt76x2_dfs_pattern_detector {
@@ -69,6 +123,16 @@ struct mt76x2_dfs_pattern_detector {
        u8 chirp_pulse_cnt;
        u32 chirp_pulse_ts;
 
+       struct mt76x2_dfs_sw_detector_params sw_dpd_params;
+       struct mt76x2_dfs_event_rb event_rb[2];
+
+       struct list_head sequences;
+       struct list_head seq_pool;
+       struct mt76x2_dfs_seq_stats seq_stats;
+
+       unsigned long last_sw_check;
+       u32 last_event_ts;
+
        struct mt76x2_dfs_engine_stats stats[MT_DFS_NUM_ENGINES];
        struct tasklet_struct dfs_tasklet;
 };
index b49aea4da2d664e5dd4bcae392c457b9a548d200..fc9af79b3e6948f41ddeaa4af2ef5e703c7b1d2e 100644 (file)
@@ -269,21 +269,31 @@ static void mt76x2_remove_hdr_pad(struct sk_buff *skb, int len)
        skb_pull(skb, len);
 }
 
-static struct mt76_wcid *
-mt76x2_rx_get_sta_wcid(struct mt76x2_dev *dev, u8 idx, bool unicast)
+static struct mt76x2_sta *
+mt76x2_rx_get_sta(struct mt76x2_dev *dev, u8 idx)
 {
-       struct mt76x2_sta *sta;
        struct mt76_wcid *wcid;
 
        if (idx >= ARRAY_SIZE(dev->wcid))
                return NULL;
 
        wcid = rcu_dereference(dev->wcid[idx]);
-       if (unicast || !wcid)
-               return wcid;
+       if (!wcid)
+               return NULL;
+
+       return container_of(wcid, struct mt76x2_sta, wcid);
+}
+
+static struct mt76_wcid *
+mt76x2_rx_get_sta_wcid(struct mt76x2_dev *dev, struct mt76x2_sta *sta, bool unicast)
+{
+       if (!sta)
+               return NULL;
 
-       sta = container_of(wcid, struct mt76x2_sta, wcid);
-       return &sta->vif->group_wcid;
+       if (unicast)
+               return &sta->wcid;
+       else
+               return &sta->vif->group_wcid;
 }
 
 int mt76x2_mac_process_rx(struct mt76x2_dev *dev, struct sk_buff *skb,
@@ -291,6 +301,7 @@ int mt76x2_mac_process_rx(struct mt76x2_dev *dev, struct sk_buff *skb,
 {
        struct mt76_rx_status *status = (struct mt76_rx_status *) skb->cb;
        struct mt76x2_rxwi *rxwi = rxi;
+       struct mt76x2_sta *sta;
        u32 rxinfo = le32_to_cpu(rxwi->rxinfo);
        u32 ctl = le32_to_cpu(rxwi->ctl);
        u16 rate = le16_to_cpu(rxwi->rate);
@@ -315,7 +326,8 @@ int mt76x2_mac_process_rx(struct mt76x2_dev *dev, struct sk_buff *skb,
        }
 
        wcid = FIELD_GET(MT_RXWI_CTL_WCID, ctl);
-       status->wcid = mt76x2_rx_get_sta_wcid(dev, wcid, unicast);
+       sta = mt76x2_rx_get_sta(dev, wcid);
+       status->wcid = mt76x2_rx_get_sta_wcid(dev, sta, unicast);
 
        len = FIELD_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
        pn_len = FIELD_GET(MT_RXINFO_PN_LEN, rxinfo);
@@ -361,6 +373,11 @@ int mt76x2_mac_process_rx(struct mt76x2_dev *dev, struct sk_buff *skb,
        status->tid = FIELD_GET(MT_RXWI_TID, tid_sn);
        status->seqno = FIELD_GET(MT_RXWI_SN, tid_sn);
 
+       if (sta) {
+               ewma_signal_add(&sta->rssi, status->signal);
+               sta->inactive_count = 0;
+       }
+
        return mt76x2_mac_process_rate(status, rate);
 }
 
@@ -439,15 +456,13 @@ mt76x2_mac_fill_tx_status(struct mt76x2_dev *dev,
        if (last_rate < IEEE80211_TX_MAX_RATES - 1)
                rate[last_rate + 1].idx = -1;
 
-       cur_idx = rate[last_rate].idx + st->retry;
+       cur_idx = rate[last_rate].idx + last_rate;
        for (i = 0; i <= last_rate; i++) {
                rate[i].flags = rate[last_rate].flags;
                rate[i].idx = max_t(int, 0, cur_idx - i);
                rate[i].count = 1;
        }
-
-       if (last_rate > 0)
-               rate[last_rate - 1].count = st->retry + 1 - last_rate;
+       rate[last_rate].count = st->retry + 1 - last_rate;
 
        info->status.ampdu_len = n_frames;
        info->status.ampdu_ack_len = st->success ? n_frames : 0;
index ce90ff999b497c83c06f08f48a3810f118c1c280..3c0ebe6d231c8724fd1284118a20b10edd0ee051 100644 (file)
@@ -238,10 +238,13 @@ mt76x2_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
        if (changed & BSS_CHANGED_BSSID)
                mt76x2_mac_set_bssid(dev, mvif->idx, info->bssid);
 
-       if (changed & BSS_CHANGED_BEACON_INT)
+       if (changed & BSS_CHANGED_BEACON_INT) {
                mt76_rmw_field(dev, MT_BEACON_TIME_CFG,
                               MT_BEACON_TIME_CFG_INTVAL,
                               info->beacon_int << 4);
+               dev->beacon_int = info->beacon_int;
+               dev->tbtt_count = 0;
+       }
 
        if (changed & BSS_CHANGED_BEACON_ENABLED) {
                tasklet_disable(&dev->pre_tbtt_tasklet);
@@ -291,6 +294,8 @@ mt76x2_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
        if (vif->type == NL80211_IFTYPE_AP)
                set_bit(MT_WCID_FLAG_CHECK_PS, &msta->wcid.flags);
 
+       ewma_signal_init(&msta->rssi);
+
        rcu_assign_pointer(dev->wcid[idx], &msta->wcid);
 
 out:
index c1c38ca3330a0c62d8b4bdec33ee2853b7e283ab..20ffa6a40d3903db5a80e07e1132a22bb4ae08ec 100644 (file)
@@ -485,12 +485,14 @@ static void
 mt76x2_phy_adjust_vga_gain(struct mt76x2_dev *dev)
 {
        u32 false_cca;
-       u8 limit = dev->cal.low_gain > 1 ? 4 : 16;
+       u8 limit = dev->cal.low_gain > 0 ? 16 : 4;
 
        false_cca = FIELD_GET(MT_RX_STAT_1_CCA_ERRORS, mt76_rr(dev, MT_RX_STAT_1));
+       dev->cal.false_cca = false_cca;
        if (false_cca > 800 && dev->cal.agc_gain_adjust < limit)
                dev->cal.agc_gain_adjust += 2;
-       else if (false_cca < 10 && dev->cal.agc_gain_adjust > 0)
+       else if ((false_cca < 10 && dev->cal.agc_gain_adjust > 0) ||
+                (dev->cal.agc_gain_adjust >= limit && false_cca < 500))
                dev->cal.agc_gain_adjust -= 2;
        else
                return;
@@ -498,60 +500,115 @@ mt76x2_phy_adjust_vga_gain(struct mt76x2_dev *dev)
        mt76x2_phy_set_gain_val(dev);
 }
 
+static int
+mt76x2_phy_get_min_avg_rssi(struct mt76x2_dev *dev)
+{
+       struct mt76x2_sta *sta;
+       struct mt76_wcid *wcid;
+       int i, j, min_rssi = 0;
+       s8 cur_rssi;
+
+       local_bh_disable();
+       rcu_read_lock();
+
+       for (i = 0; i < ARRAY_SIZE(dev->wcid_mask); i++) {
+               unsigned long mask = dev->wcid_mask[i];
+
+               if (!mask)
+                       continue;
+
+               for (j = i * BITS_PER_LONG; mask; j++, mask >>= 1) {
+                       if (!(mask & 1))
+                               continue;
+
+                       wcid = rcu_dereference(dev->wcid[j]);
+                       if (!wcid)
+                               continue;
+
+                       sta = container_of(wcid, struct mt76x2_sta, wcid);
+                       spin_lock(&dev->mt76.rx_lock);
+                       if (sta->inactive_count++ < 5)
+                               cur_rssi = ewma_signal_read(&sta->rssi);
+                       else
+                               cur_rssi = 0;
+                       spin_unlock(&dev->mt76.rx_lock);
+
+                       if (cur_rssi < min_rssi)
+                               min_rssi = cur_rssi;
+               }
+       }
+
+       rcu_read_unlock();
+       local_bh_enable();
+
+       if (!min_rssi)
+               return -75;
+
+       return min_rssi;
+}
+
 static void
 mt76x2_phy_update_channel_gain(struct mt76x2_dev *dev)
 {
-       u32 val = mt76_rr(dev, MT_BBP(AGC, 20));
-       int rssi0 = (s8) FIELD_GET(MT_BBP_AGC20_RSSI0, val);
-       int rssi1 = (s8) FIELD_GET(MT_BBP_AGC20_RSSI1, val);
        u8 *gain = dev->cal.agc_gain_init;
-       u8 gain_delta;
+       u8 low_gain_delta, gain_delta;
+       bool gain_change;
        int low_gain;
+       u32 val;
 
-       dev->cal.avg_rssi[0] = (dev->cal.avg_rssi[0] * 15) / 16 +
-                              (rssi0 << 8) / 16;
-       dev->cal.avg_rssi[1] = (dev->cal.avg_rssi[1] * 15) / 16 +
-                              (rssi1 << 8) / 16;
-       dev->cal.avg_rssi_all = (dev->cal.avg_rssi[0] +
-                                dev->cal.avg_rssi[1]) / 512;
+       dev->cal.avg_rssi_all = mt76x2_phy_get_min_avg_rssi(dev);
 
        low_gain = (dev->cal.avg_rssi_all > mt76x2_get_rssi_gain_thresh(dev)) +
                   (dev->cal.avg_rssi_all > mt76x2_get_low_rssi_gain_thresh(dev));
 
-       if (dev->cal.low_gain == low_gain) {
+       gain_change = (dev->cal.low_gain & 2) ^ (low_gain & 2);
+       dev->cal.low_gain = low_gain;
+
+       if (!gain_change) {
                mt76x2_phy_adjust_vga_gain(dev);
                return;
        }
 
-       dev->cal.low_gain = low_gain;
-
-       if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80)
+       if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80) {
                mt76_wr(dev, MT_BBP(RXO, 14), 0x00560211);
-       else
+               val = mt76_rr(dev, MT_BBP(AGC, 26)) & ~0xf;
+               if (low_gain == 2)
+                       val |= 0x3;
+               else
+                       val |= 0x5;
+               mt76_wr(dev, MT_BBP(AGC, 26), val);
+       } else {
                mt76_wr(dev, MT_BBP(RXO, 14), 0x00560423);
+       }
 
-       if (low_gain) {
-               mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a991);
+       if (mt76x2_has_ext_lna(dev))
+               low_gain_delta = 10;
+       else
+               low_gain_delta = 14;
+
+       if (low_gain == 2) {
+               mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a990);
                mt76_wr(dev, MT_BBP(AGC, 35), 0x08080808);
                mt76_wr(dev, MT_BBP(AGC, 37), 0x08080808);
-               if (mt76x2_has_ext_lna(dev))
-                       gain_delta = 10;
-               else
-                       gain_delta = 14;
+               gain_delta = low_gain_delta;
+               dev->cal.agc_gain_adjust = 0;
        } else {
-               mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a990);
+               mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a991);
                if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80)
                        mt76_wr(dev, MT_BBP(AGC, 35), 0x10101014);
                else
                        mt76_wr(dev, MT_BBP(AGC, 35), 0x11111116);
                mt76_wr(dev, MT_BBP(AGC, 37), 0x2121262C);
                gain_delta = 0;
+               dev->cal.agc_gain_adjust = low_gain_delta;
        }
 
        dev->cal.agc_gain_cur[0] = gain[0] - gain_delta;
        dev->cal.agc_gain_cur[1] = gain[1] - gain_delta;
-       dev->cal.agc_gain_adjust = 0;
        mt76x2_phy_set_gain_val(dev);
+
+       /* clear false CCA counters */
+       mt76_rr(dev, MT_RX_STAT_1);
 }
 
 int mt76x2_phy_set_channel(struct mt76x2_dev *dev,
index e46eafc4c436e08a86e7be78e5db167ad931df58..560376dd113337a1ff4c09d892d995eedb7d85a4 100644 (file)
@@ -218,6 +218,37 @@ mt76x2_add_buffered_bc(void *priv, u8 *mac, struct ieee80211_vif *vif)
        data->tail[mvif->idx] = skb;
 }
 
+static void
+mt76x2_resync_beacon_timer(struct mt76x2_dev *dev)
+{
+       u32 timer_val = dev->beacon_int << 4;
+
+       dev->tbtt_count++;
+
+       /*
+        * Beacon timer drifts by 1us every tick, the timer is configured
+        * in 1/16 TU (64us) units.
+        */
+       if (dev->tbtt_count < 62)
+               return;
+
+       if (dev->tbtt_count >= 64) {
+               dev->tbtt_count = 0;
+               return;
+       }
+
+       /*
+        * The updated beacon interval takes effect after two TBTT, because
+        * at this point the original interval has already been loaded into
+        * the next TBTT_TIMER value
+        */
+       if (dev->tbtt_count == 62)
+               timer_val -= 1;
+
+       mt76_rmw_field(dev, MT_BEACON_TIME_CFG,
+                      MT_BEACON_TIME_CFG_INTVAL, timer_val);
+}
+
 void mt76x2_pre_tbtt_tasklet(unsigned long arg)
 {
        struct mt76x2_dev *dev = (struct mt76x2_dev *) arg;
@@ -226,6 +257,8 @@ void mt76x2_pre_tbtt_tasklet(unsigned long arg)
        struct sk_buff *skb;
        int i, nframes;
 
+       mt76x2_resync_beacon_timer(dev);
+
        data.dev = dev;
        __skb_queue_head_init(&data.q);
 
index ae0ca800684950e65ecc01916f4782af54a8e0eb..656ddc65921854b5f78769417c471f57eaa6c961 100644 (file)
@@ -1013,6 +1013,9 @@ int qtnf_wiphy_register(struct qtnf_hw_info *hw_info, struct qtnf_wmac *mac)
        if (hw_info->hw_capab & QLINK_HW_CAPAB_STA_INACT_TIMEOUT)
                wiphy->features |= NL80211_FEATURE_INACTIVITY_TIMER;
 
+       if (hw_info->hw_capab & QLINK_HW_CAPAB_SCAN_RANDOM_MAC_ADDR)
+               wiphy->features |= NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+
        if (hw_info->hw_capab & QLINK_HW_CAPAB_REG_UPDATE) {
                wiphy->regulatory_flags |= REGULATORY_STRICT_REG |
                        REGULATORY_CUSTOM_REG;
index c5d94a95e21a4abfabeeeca9fe2f19a780e25c3e..42a598f92539a20793e197d4ac1923e072f95d09 100644 (file)
@@ -640,83 +640,83 @@ qtnf_cmd_sta_info_parse(struct station_info *sinfo,
                return;
 
        if (qtnf_sta_stat_avail(inactive_time, QLINK_STA_INFO_INACTIVE_TIME)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME);
                sinfo->inactive_time = le32_to_cpu(stats->inactive_time);
        }
 
        if (qtnf_sta_stat_avail(connected_time,
                                QLINK_STA_INFO_CONNECTED_TIME)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_CONNECTED_TIME);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME);
                sinfo->connected_time = le32_to_cpu(stats->connected_time);
        }
 
        if (qtnf_sta_stat_avail(signal, QLINK_STA_INFO_SIGNAL)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
                sinfo->signal = stats->signal - QLINK_RSSI_OFFSET;
        }
 
        if (qtnf_sta_stat_avail(signal_avg, QLINK_STA_INFO_SIGNAL_AVG)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
                sinfo->signal_avg = stats->signal_avg - QLINK_RSSI_OFFSET;
        }
 
        if (qtnf_sta_stat_avail(rxrate, QLINK_STA_INFO_RX_BITRATE)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
                qtnf_sta_info_parse_rate(&sinfo->rxrate, &stats->rxrate);
        }
 
        if (qtnf_sta_stat_avail(txrate, QLINK_STA_INFO_TX_BITRATE)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
                qtnf_sta_info_parse_rate(&sinfo->txrate, &stats->txrate);
        }
 
        if (qtnf_sta_stat_avail(sta_flags, QLINK_STA_INFO_STA_FLAGS)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_STA_FLAGS);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_STA_FLAGS);
                qtnf_sta_info_parse_flags(&sinfo->sta_flags, &stats->sta_flags);
        }
 
        if (qtnf_sta_stat_avail(rx_bytes, QLINK_STA_INFO_RX_BYTES)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES);
                sinfo->rx_bytes = le64_to_cpu(stats->rx_bytes);
        }
 
        if (qtnf_sta_stat_avail(tx_bytes, QLINK_STA_INFO_TX_BYTES)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES);
                sinfo->tx_bytes = le64_to_cpu(stats->tx_bytes);
        }
 
        if (qtnf_sta_stat_avail(rx_bytes, QLINK_STA_INFO_RX_BYTES64)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64);
                sinfo->rx_bytes = le64_to_cpu(stats->rx_bytes);
        }
 
        if (qtnf_sta_stat_avail(tx_bytes, QLINK_STA_INFO_TX_BYTES64)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64);
                sinfo->tx_bytes = le64_to_cpu(stats->tx_bytes);
        }
 
        if (qtnf_sta_stat_avail(rx_packets, QLINK_STA_INFO_RX_PACKETS)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS);
                sinfo->rx_packets = le32_to_cpu(stats->rx_packets);
        }
 
        if (qtnf_sta_stat_avail(tx_packets, QLINK_STA_INFO_TX_PACKETS)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
                sinfo->tx_packets = le32_to_cpu(stats->tx_packets);
        }
 
        if (qtnf_sta_stat_avail(rx_beacon, QLINK_STA_INFO_BEACON_RX)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_RX);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX);
                sinfo->rx_beacon = le64_to_cpu(stats->rx_beacon);
        }
 
        if (qtnf_sta_stat_avail(rx_dropped_misc, QLINK_STA_INFO_RX_DROP_MISC)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_RX_DROP_MISC);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC);
                sinfo->rx_dropped_misc = le32_to_cpu(stats->rx_dropped_misc);
        }
 
        if (qtnf_sta_stat_avail(tx_failed, QLINK_STA_INFO_TX_FAILED)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
                sinfo->tx_failed = le32_to_cpu(stats->tx_failed);
        }
 
@@ -2234,6 +2234,22 @@ static void qtnf_cmd_channel_tlv_add(struct sk_buff *cmd_skb,
        qchan->chan.flags = cpu_to_le32(flags);
 }
 
+static void qtnf_cmd_randmac_tlv_add(struct sk_buff *cmd_skb,
+                                    const u8 *mac_addr,
+                                    const u8 *mac_addr_mask)
+{
+       struct qlink_random_mac_addr *randmac;
+       struct qlink_tlv_hdr *hdr =
+               skb_put(cmd_skb, sizeof(*hdr) + sizeof(*randmac));
+
+       hdr->type = cpu_to_le16(QTN_TLV_ID_RANDOM_MAC_ADDR);
+       hdr->len = cpu_to_le16(sizeof(*randmac));
+       randmac = (struct qlink_random_mac_addr *)hdr->val;
+
+       memcpy(randmac->mac_addr, mac_addr, ETH_ALEN);
+       memcpy(randmac->mac_addr_mask, mac_addr_mask, ETH_ALEN);
+}
+
 int qtnf_cmd_send_scan(struct qtnf_wmac *mac)
 {
        struct sk_buff *cmd_skb;
@@ -2291,6 +2307,15 @@ int qtnf_cmd_send_scan(struct qtnf_wmac *mac)
                }
        }
 
+       if (scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+               pr_debug("MAC%u: scan with random addr=%pM, mask=%pM\n",
+                        mac->macid,
+                        scan_req->mac_addr, scan_req->mac_addr_mask);
+
+               qtnf_cmd_randmac_tlv_add(cmd_skb, scan_req->mac_addr,
+                                        scan_req->mac_addr_mask);
+       }
+
        ret = qtnf_cmd_send(mac->bus, cmd_skb, &res_code);
 
        if (unlikely(ret))
index a6a450984f9acf63924343cae5a08d63d335b48d..c318340e1bd57a29f3a4b5d171e600e7753ae5f9 100644 (file)
@@ -179,6 +179,30 @@ static void qtnf_netdev_tx_timeout(struct net_device *ndev)
        }
 }
 
+static int qtnf_netdev_set_mac_address(struct net_device *ndev, void *addr)
+{
+       struct qtnf_vif *vif = qtnf_netdev_get_priv(ndev);
+       struct sockaddr *sa = addr;
+       int ret;
+       unsigned char old_addr[ETH_ALEN];
+
+       memcpy(old_addr, sa->sa_data, sizeof(old_addr));
+
+       ret = eth_mac_addr(ndev, sa);
+       if (ret)
+               return ret;
+
+       qtnf_scan_done(vif->mac, true);
+
+       ret = qtnf_cmd_send_change_intf_type(vif, vif->wdev.iftype,
+                                            sa->sa_data);
+
+       if (ret)
+               memcpy(ndev->dev_addr, old_addr, ETH_ALEN);
+
+       return ret;
+}
+
 /* Network device ops handlers */
 const struct net_device_ops qtnf_netdev_ops = {
        .ndo_open = qtnf_netdev_open,
@@ -186,6 +210,7 @@ const struct net_device_ops qtnf_netdev_ops = {
        .ndo_start_xmit = qtnf_netdev_hard_start_xmit,
        .ndo_tx_timeout = qtnf_netdev_tx_timeout,
        .ndo_get_stats64 = qtnf_netdev_get_stats64,
+       .ndo_set_mac_address = qtnf_netdev_set_mac_address,
 };
 
 static int qtnf_mac_init_single_band(struct wiphy *wiphy,
index f85deda703fb705675759f3241cd4ae1df2ecc98..4a32967d04793862368d3cdbcf9a27e53dd5c6b2 100644 (file)
@@ -69,11 +69,14 @@ struct qlink_msg_header {
  *     associated STAs due to inactivity. Inactivity timeout period is taken
  *     from QLINK_CMD_START_AP parameters.
  * @QLINK_HW_CAPAB_DFS_OFFLOAD: device implements DFS offload functionality
+ * @QLINK_HW_CAPAB_SCAN_RANDOM_MAC_ADDR: device supports MAC Address
+ *     Randomization in probe requests.
  */
 enum qlink_hw_capab {
        QLINK_HW_CAPAB_REG_UPDATE               = BIT(0),
        QLINK_HW_CAPAB_STA_INACT_TIMEOUT        = BIT(1),
        QLINK_HW_CAPAB_DFS_OFFLOAD              = BIT(2),
+       QLINK_HW_CAPAB_SCAN_RANDOM_MAC_ADDR     = BIT(3),
 };
 
 enum qlink_iface_type {
@@ -1089,6 +1092,7 @@ enum qlink_tlv_id {
        QTN_TLV_ID_HW_ID                = 0x0405,
        QTN_TLV_ID_CALIBRATION_VER      = 0x0406,
        QTN_TLV_ID_UBOOT_VER            = 0x0407,
+       QTN_TLV_ID_RANDOM_MAC_ADDR      = 0x0408,
 };
 
 struct qlink_tlv_hdr {
@@ -1360,4 +1364,20 @@ struct qlink_sta_stats {
        u8 rsvd[1];
 };
 
+/**
+ * struct qlink_random_mac_addr - data for QTN_TLV_ID_RANDOM_MAC_ADDR TLV
+ *
+ * Specifies MAC address mask/value for generation random MAC address
+ * during scan.
+ *
+ * @mac_addr: MAC address used with randomisation
+ * @mac_addr_mask: MAC address mask used with randomisation, bits that
+ *     are 0 in the mask should be randomised, bits that are 1 should
+ *     be taken from the @mac_addr
+ */
+struct qlink_random_mac_addr {
+       u8 mac_addr[ETH_ALEN];
+       u8 mac_addr_mask[ETH_ALEN];
+} __packed;
+
 #endif /* _QTN_QLINK_H_ */
index 54c9f6ab0c8cadb483d10413783b45b401c6f6f4..f4122c8fdd9777e852ac1bc0f01d9cedcde6c84a 100644 (file)
@@ -1907,7 +1907,7 @@ void rtl_rx_ampdu_apply(struct rtl_priv *rtlpriv)
                 reject_agg, ctrl_agg_size, agg_size);
 
        rtlpriv->hw->max_rx_aggregation_subframes =
-               (ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF);
+               (ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF_HT);
 }
 EXPORT_SYMBOL(rtl_rx_ampdu_apply);
 
index 9935bd09db1fb309090222be94c85ec98917b5cc..51e4e92d95a0d314c600771f06cb938e6731091b 100644 (file)
@@ -2480,7 +2480,7 @@ static void rndis_fill_station_info(struct usbnet *usbdev,
        ret = rndis_query_oid(usbdev, RNDIS_OID_GEN_LINK_SPEED, &linkspeed, &len);
        if (ret == 0) {
                sinfo->txrate.legacy = le32_to_cpu(linkspeed) / 1000;
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
        }
 
        len = sizeof(rssi);
@@ -2488,7 +2488,7 @@ static void rndis_fill_station_info(struct usbnet *usbdev,
                              &rssi, &len);
        if (ret == 0) {
                sinfo->signal = level_to_qual(le32_to_cpu(rssi));
-               sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
        }
 }
 
@@ -2928,6 +2928,8 @@ static void rndis_wlan_auth_indication(struct usbnet *usbdev,
 
        while (buflen >= sizeof(*auth_req)) {
                auth_req = (void *)buf;
+               if (buflen < le32_to_cpu(auth_req->length))
+                       return;
                type = "unknown";
                flags = le32_to_cpu(auth_req->flags);
                pairwise_error = false;
index 86ccf84ea0c6bb0fb8e01a31fa481520df0447db..597e934c4630edab93892bb270ce6bd3cd8475ac 100644 (file)
@@ -20,6 +20,8 @@
  *
  */
 
+#include <linux/pm_runtime.h>
+
 #include "../wlcore/debugfs.h"
 #include "../wlcore/wlcore.h"
 #include "../wlcore/debug.h"
@@ -276,15 +278,18 @@ static ssize_t radar_detection_write(struct file *file,
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl18xx_cmd_radar_detection_debug(wl, channel);
        if (ret < 0)
                count = ret;
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
        return count;
@@ -315,15 +320,18 @@ static ssize_t dynamic_fw_traces_write(struct file *file,
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl18xx_acx_dynamic_fw_traces(wl);
        if (ret < 0)
                count = ret;
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
        return count;
@@ -374,9 +382,11 @@ static ssize_t radar_debug_mode_write(struct file *file,
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl12xx_for_each_wlvif_ap(wl, wlvif) {
                wlcore_cmd_generic_cfg(wl, wlvif,
@@ -384,7 +394,8 @@ static ssize_t radar_debug_mode_write(struct file *file,
                                       wl->radar_debug_mode, 0);
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
        return count;
index 3ca9167d6146af610c445f358bf45b2bebd52c03..7c83915a7c5ec17c2125d5ac8dce04fffec3f718 100644 (file)
@@ -31,7 +31,6 @@
 #include "wlcore.h"
 #include "debug.h"
 #include "wl12xx_80211.h"
-#include "ps.h"
 #include "hw_ops.h"
 
 int wl1271_acx_wake_up_conditions(struct wl1271 *wl, struct wl12xx_vif *wlvif,
index 761cf8573a805e272121fa05bf129f1ee600a10a..903968735a74202957c7a04ad563adb7c47feb80 100644 (file)
@@ -23,6 +23,7 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
 #include <linux/etherdevice.h>
 #include <linux/ieee80211.h>
@@ -191,6 +192,12 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
 
        timeout_time = jiffies + msecs_to_jiffies(WL1271_EVENT_TIMEOUT);
 
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
+               goto free_vector;
+       }
+
        do {
                if (time_after(jiffies, timeout_time)) {
                        wl1271_debug(DEBUG_CMD, "timeout waiting for event %d",
@@ -222,6 +229,9 @@ int wlcore_cmd_wait_for_event_or_timeout(struct wl1271 *wl,
        } while (!event);
 
 out:
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
+free_vector:
        kfree(events_vector);
        return ret;
 }
index a2cb408be8aaaae14afccd23d7f29db0d903c837..aeb74e74698e8dd440a21d5de895048b66c0858e 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/pm_runtime.h>
 
 #include "wlcore.h"
 #include "debug.h"
@@ -65,9 +66,11 @@ void wl1271_debugfs_update_stats(struct wl1271 *wl)
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        if (!wl->plt &&
            time_after(jiffies, wl->stats.fw_stats_update +
@@ -76,7 +79,8 @@ void wl1271_debugfs_update_stats(struct wl1271 *wl)
                wl->stats.fw_stats_update = jiffies;
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -118,14 +122,18 @@ static void chip_op_handler(struct wl1271 *wl, unsigned long value,
                return;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
+
                return;
+       }
 
        chip_op = arg;
        chip_op(wl);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 }
 
 
@@ -292,9 +300,11 @@ static ssize_t dynamic_ps_timeout_write(struct file *file,
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        /* In case we're already in PSM, trigger it again to set new timeout
         * immediately without waiting for re-association
@@ -305,7 +315,8 @@ static ssize_t dynamic_ps_timeout_write(struct file *file,
                        wl1271_ps_set_mode(wl, wlvif, STATION_AUTO_PS_MODE);
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -359,9 +370,11 @@ static ssize_t forced_ps_write(struct file *file,
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        /* In case we're already in PSM, trigger it again to switch mode
         * immediately without waiting for re-association
@@ -374,7 +387,8 @@ static ssize_t forced_ps_write(struct file *file,
                        wl1271_ps_set_mode(wl, wlvif, ps_mode);
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -838,15 +852,18 @@ static ssize_t rx_streaming_interval_write(struct file *file,
 
        wl->conf.rx_streaming.interval = value;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl12xx_for_each_wlvif_sta(wl, wlvif) {
                wl1271_recalc_rx_streaming(wl, wlvif);
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
        return count;
@@ -893,15 +910,18 @@ static ssize_t rx_streaming_always_write(struct file *file,
 
        wl->conf.rx_streaming.always = value;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl12xx_for_each_wlvif_sta(wl, wlvif) {
                wl1271_recalc_rx_streaming(wl, wlvif);
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
        return count;
@@ -940,15 +960,18 @@ static ssize_t beacon_filtering_write(struct file *file,
 
        mutex_lock(&wl->mutex);
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl12xx_for_each_wlvif(wl, wlvif) {
                ret = wl1271_acx_beacon_filter_opt(wl, wlvif, !!value);
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
        return count;
@@ -1019,16 +1042,19 @@ static ssize_t sleep_auth_write(struct file *file,
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl1271_acx_sleep_auth(wl, value);
        if (ret < 0)
                goto out_sleep;
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
        return count;
@@ -1083,7 +1109,7 @@ static ssize_t dev_mem_read(struct file *file,
         * Don't fail if elp_wakeup returns an error, so the device's memory
         * could be read even if the FW crashed
         */
-       wl1271_ps_elp_wakeup(wl);
+       pm_runtime_get_sync(wl->dev);
 
        /* store current partition and switch partition */
        memcpy(&old_part, &wl->curr_part, sizeof(old_part));
@@ -1102,7 +1128,8 @@ read_err:
                goto part_err;
 
 part_err:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 skip_read:
        mutex_unlock(&wl->mutex);
@@ -1164,7 +1191,7 @@ static ssize_t dev_mem_write(struct file *file, const char __user *user_buf,
         * Don't fail if elp_wakeup returns an error, so the device's memory
         * could be read even if the FW crashed
         */
-       wl1271_ps_elp_wakeup(wl);
+       pm_runtime_get_sync(wl->dev);
 
        /* store current partition and switch partition */
        memcpy(&old_part, &wl->curr_part, sizeof(old_part));
@@ -1183,7 +1210,8 @@ write_err:
                goto part_err;
 
 part_err:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 skip_write:
        mutex_unlock(&wl->mutex);
@@ -1247,8 +1275,9 @@ static ssize_t fw_logger_write(struct file *file,
        }
 
        mutex_lock(&wl->mutex);
-       ret = wl1271_ps_elp_wakeup(wl);
+       ret = pm_runtime_get_sync(wl->dev);
        if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                count = ret;
                goto out;
        }
@@ -1257,7 +1286,8 @@ static ssize_t fw_logger_write(struct file *file,
 
        ret = wl12xx_cmd_config_fwlog(wl);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
index 3a51ab116e79c201f65708b7186aa9ee3f2bf4d2..37f785f601c1603e4e28199862f08817715fb640 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/vmalloc.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/pm_runtime.h>
 
 #include "wlcore.h"
 #include "debug.h"
@@ -43,6 +44,7 @@
 
 #define WL1271_BOOT_RETRIES 3
 #define WL1271_SUSPEND_SLEEP 100
+#define WL1271_WAKEUP_TIMEOUT 500
 
 static char *fwlog_param;
 static int fwlog_mem_blocks = -1;
@@ -153,9 +155,11 @@ static void wl1271_rx_streaming_enable_work(struct work_struct *work)
        if (!wl->conf.rx_streaming.interval)
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl1271_set_rx_streaming(wl, wlvif, true);
        if (ret < 0)
@@ -166,7 +170,8 @@ static void wl1271_rx_streaming_enable_work(struct work_struct *work)
                  jiffies + msecs_to_jiffies(wl->conf.rx_streaming.duration));
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 }
@@ -183,16 +188,19 @@ static void wl1271_rx_streaming_disable_work(struct work_struct *work)
        if (!test_bit(WLVIF_FLAG_RX_STREAMING_STARTED, &wlvif->flags))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl1271_set_rx_streaming(wl, wlvif, false);
        if (ret)
                goto out_sleep;
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 }
@@ -229,9 +237,11 @@ static void wlcore_rc_update_work(struct work_struct *work)
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        if (ieee80211_vif_is_mesh(vif)) {
                ret = wl1271_acx_set_ht_capabilities(wl, &wlvif->rc_ht_cap,
@@ -243,7 +253,8 @@ static void wlcore_rc_update_work(struct work_struct *work)
        }
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 }
@@ -539,15 +550,16 @@ static int wlcore_irq_locked(struct wl1271 *wl)
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        while (!done && loopcount--) {
                /*
                 * In order to avoid a race with the hardirq, clear the flag
-                * before acknowledging the chip. Since the mutex is held,
-                * wl1271_ps_elp_wakeup cannot be called concurrently.
+                * before acknowledging the chip.
                 */
                clear_bit(WL1271_FLAG_IRQ_RUNNING, &wl->flags);
                smp_mb__after_atomic();
@@ -641,7 +653,8 @@ static int wlcore_irq_locked(struct wl1271 *wl)
                        wl1271_debug(DEBUG_IRQ, "WL1271_ACX_INTR_HW_AVAILABLE");
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        return ret;
@@ -796,8 +809,6 @@ void wl12xx_queue_recovery_work(struct wl1271 *wl)
 
                wl->state = WLCORE_STATE_RESTARTING;
                set_bit(WL1271_FLAG_RECOVERY_IN_PROGRESS, &wl->flags);
-               wl1271_ps_elp_wakeup(wl);
-               wlcore_disable_interrupts_nosync(wl);
                ieee80211_queue_work(wl->hw, &wl->recovery_work);
        }
 }
@@ -819,6 +830,7 @@ size_t wl12xx_copy_fwlog(struct wl1271 *wl, u8 *memblock, size_t maxlen)
 static void wl12xx_read_fwlog_panic(struct wl1271 *wl)
 {
        u32 end_of_log = 0;
+       int error;
 
        if (wl->quirks & WLCORE_QUIRK_FWLOG_NOT_IMPLEMENTED)
                return;
@@ -830,8 +842,11 @@ static void wl12xx_read_fwlog_panic(struct wl1271 *wl)
         * Do not send a stop fwlog command if the fw is hanged or if
         * dbgpins are used (due to some fw bug).
         */
-       if (wl1271_ps_elp_wakeup(wl))
+       error = pm_runtime_get_sync(wl->dev);
+       if (error < 0) {
+               pm_runtime_put_noidle(wl->dev);
                return;
+       }
        if (!wl->watchdog_recovery &&
            wl->conf.fwlog.output != WL12XX_FWLOG_OUTPUT_DBG_PINS)
                wl12xx_cmd_stop_fwlog(wl);
@@ -919,12 +934,20 @@ static void wl1271_recovery_work(struct work_struct *work)
                container_of(work, struct wl1271, recovery_work);
        struct wl12xx_vif *wlvif;
        struct ieee80211_vif *vif;
+       int error;
 
        mutex_lock(&wl->mutex);
 
        if (wl->state == WLCORE_STATE_OFF || wl->plt)
                goto out_unlock;
 
+       error = pm_runtime_get_sync(wl->dev);
+       if (error < 0) {
+               wl1271_warning("Enable for recovery failed");
+               pm_runtime_put_noidle(wl->dev);
+       }
+       wlcore_disable_interrupts_nosync(wl);
+
        if (!test_bit(WL1271_FLAG_INTENDED_FW_RECOVERY, &wl->flags)) {
                if (wl->conf.fwlog.output == WL12XX_FWLOG_OUTPUT_HOST)
                        wl12xx_read_fwlog_panic(wl);
@@ -958,6 +981,8 @@ static void wl1271_recovery_work(struct work_struct *work)
        }
 
        wlcore_op_stop_locked(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
        ieee80211_restart_hw(wl->hw);
 
@@ -978,24 +1003,6 @@ static int wlcore_fw_wakeup(struct wl1271 *wl)
        return wlcore_raw_write32(wl, HW_ACCESS_ELP_CTRL_REG, ELPCTRL_WAKE_UP);
 }
 
-static int wlcore_fw_sleep(struct wl1271 *wl)
-{
-       int ret;
-
-       mutex_lock(&wl->mutex);
-       ret = wlcore_raw_write32(wl, HW_ACCESS_ELP_CTRL_REG, ELPCTRL_SLEEP);
-       if (ret < 0) {
-               wl12xx_queue_recovery_work(wl);
-               goto out;
-       }
-       set_bit(WL1271_FLAG_IN_ELP, &wl->flags);
-out:
-       mutex_unlock(&wl->mutex);
-       mdelay(WL1271_SUSPEND_SLEEP);
-
-       return 0;
-}
-
 static int wl1271_setup(struct wl1271 *wl)
 {
        wl->raw_fw_status = kzalloc(wl->fw_status_len, GFP_KERNEL);
@@ -1184,7 +1191,6 @@ int wl1271_plt_stop(struct wl1271 *wl)
        wl1271_flush_deferred_work(wl);
        cancel_work_sync(&wl->netstack_work);
        cancel_work_sync(&wl->recovery_work);
-       cancel_delayed_work_sync(&wl->elp_work);
        cancel_delayed_work_sync(&wl->tx_watchdog_work);
 
        mutex_lock(&wl->mutex);
@@ -1719,6 +1725,7 @@ static int __maybe_unused wl1271_op_suspend(struct ieee80211_hw *hw,
 {
        struct wl1271 *wl = hw->priv;
        struct wl12xx_vif *wlvif;
+       unsigned long flags;
        int ret;
 
        wl1271_debug(DEBUG_MAC80211, "mac80211 suspend wow=%d", !!wow);
@@ -1734,8 +1741,9 @@ static int __maybe_unused wl1271_op_suspend(struct ieee80211_hw *hw,
 
        mutex_lock(&wl->mutex);
 
-       ret = wl1271_ps_elp_wakeup(wl);
+       ret = pm_runtime_get_sync(wl->dev);
        if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                mutex_unlock(&wl->mutex);
                return ret;
        }
@@ -1765,6 +1773,7 @@ static int __maybe_unused wl1271_op_suspend(struct ieee80211_hw *hw,
                goto out_sleep;
 
 out_sleep:
+       pm_runtime_put_noidle(wl->dev);
        mutex_unlock(&wl->mutex);
 
        if (ret < 0) {
@@ -1775,21 +1784,7 @@ out_sleep:
        /* flush any remaining work */
        wl1271_debug(DEBUG_MAC80211, "flushing remaining works");
 
-       /*
-        * disable and re-enable interrupts in order to flush
-        * the threaded_irq
-        */
-       wlcore_disable_interrupts(wl);
-
-       /*
-        * set suspended flag to avoid triggering a new threaded_irq
-        * work. no need for spinlock as interrupts are disabled.
-        */
-       set_bit(WL1271_FLAG_SUSPENDED, &wl->flags);
-
-       wlcore_enable_interrupts(wl);
        flush_work(&wl->tx_work);
-       flush_delayed_work(&wl->elp_work);
 
        /*
         * Cancel the watchdog even if above tx_flush failed. We will detect
@@ -1798,15 +1793,14 @@ out_sleep:
        cancel_delayed_work(&wl->tx_watchdog_work);
 
        /*
-        * Use an immediate call for allowing the firmware to go into power
-        * save during suspend.
-        * Using a workque for this last write was only hapenning on resume
-        * leaving the firmware with power save disabled during suspend,
-        * while consuming full power during wowlan suspend.
+        * set suspended flag to avoid triggering a new threaded_irq
+        * work.
         */
-       wlcore_fw_sleep(wl);
+       spin_lock_irqsave(&wl->wl_lock, flags);
+       set_bit(WL1271_FLAG_SUSPENDED, &wl->flags);
+       spin_unlock_irqrestore(&wl->wl_lock, flags);
 
-       return 0;
+       return pm_runtime_force_suspend(wl->dev);
 }
 
 static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw)
@@ -1821,6 +1815,12 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw)
                     wl->wow_enabled);
        WARN_ON(!wl->wow_enabled);
 
+       ret = pm_runtime_force_resume(wl->dev);
+       if (ret < 0) {
+               wl1271_error("ELP wakeup failure!");
+               goto out_sleep;
+       }
+
        /*
         * re-enable irq_work enqueuing, and call irq_work directly if
         * there is a pending work.
@@ -1857,9 +1857,11 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw)
                goto out_sleep;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl12xx_for_each_wlvif(wl, wlvif) {
                if (wlcore_is_p2p_mgmt(wlvif))
@@ -1878,7 +1880,8 @@ static int __maybe_unused wl1271_op_resume(struct ieee80211_hw *hw)
                goto out_sleep;
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        wl->wow_enabled = false;
@@ -1945,7 +1948,6 @@ static void wlcore_op_stop_locked(struct wl1271 *wl)
        cancel_delayed_work_sync(&wl->scan_complete_work);
        cancel_work_sync(&wl->netstack_work);
        cancel_work_sync(&wl->tx_work);
-       cancel_delayed_work_sync(&wl->elp_work);
        cancel_delayed_work_sync(&wl->tx_watchdog_work);
 
        /* let's notify MAC80211 about the remaining pending TX frames */
@@ -2060,13 +2062,16 @@ static void wlcore_channel_switch_work(struct work_struct *work)
        vif = wl12xx_wlvif_to_vif(wlvif);
        ieee80211_chswitch_done(vif, false);
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl12xx_cmd_stop_channel_switch(wl, wlvif);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 }
@@ -2128,14 +2133,17 @@ static void wlcore_pending_auth_complete_work(struct work_struct *work)
        if (!time_after(time_spare, wlvif->pending_auth_reply_time))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        /* cancel the ROC if active */
        wlcore_update_inconn_sta(wl, wlvif, NULL, false);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 }
@@ -2537,9 +2545,6 @@ static int wl1271_op_add_interface(struct ieee80211_hw *hw,
        wl12xx_get_vif_count(hw, vif, &vif_count);
 
        mutex_lock(&wl->mutex);
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
-               goto out_unlock;
 
        /*
         * in some very corner case HW recovery scenarios its possible to
@@ -2568,14 +2573,6 @@ static int wl1271_op_add_interface(struct ieee80211_hw *hw,
        if (ret < 0)
                goto out;
 
-       if (wl12xx_need_fw_change(wl, vif_count, true)) {
-               wl12xx_force_active_psm(wl);
-               set_bit(WL1271_FLAG_INTENDED_FW_RECOVERY, &wl->flags);
-               mutex_unlock(&wl->mutex);
-               wl1271_recovery_work(&wl->recovery_work);
-               return 0;
-       }
-
        /*
         * TODO: after the nvs issue will be solved, move this block
         * to start(), and make sure here the driver is ON.
@@ -2592,6 +2589,24 @@ static int wl1271_op_add_interface(struct ieee80211_hw *hw,
                        goto out;
        }
 
+       /*
+        * Call runtime PM only after possible wl12xx_init_fw() above
+        * is done. Otherwise we do not have interrupts enabled.
+        */
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
+               goto out_unlock;
+       }
+
+       if (wl12xx_need_fw_change(wl, vif_count, true)) {
+               wl12xx_force_active_psm(wl);
+               set_bit(WL1271_FLAG_INTENDED_FW_RECOVERY, &wl->flags);
+               mutex_unlock(&wl->mutex);
+               wl1271_recovery_work(&wl->recovery_work);
+               return 0;
+       }
+
        if (!wlcore_is_p2p_mgmt(wlvif)) {
                ret = wl12xx_cmd_role_enable(wl, vif->addr,
                                             role_type, &wlvif->role_id);
@@ -2622,7 +2637,8 @@ static int wl1271_op_add_interface(struct ieee80211_hw *hw,
        else
                wl->sta_count++;
 out:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out_unlock:
        mutex_unlock(&wl->mutex);
 
@@ -2677,9 +2693,11 @@ static void __wl1271_op_remove_interface(struct wl1271 *wl,
 
        if (!test_bit(WL1271_FLAG_RECOVERY_IN_PROGRESS, &wl->flags)) {
                /* disable active roles */
-               ret = wl1271_ps_elp_wakeup(wl);
-               if (ret < 0)
+               ret = pm_runtime_get_sync(wl->dev);
+               if (ret < 0) {
+                       pm_runtime_put_noidle(wl->dev);
                        goto deinit;
+               }
 
                if (wlvif->bss_type == BSS_TYPE_STA_BSS ||
                    wlvif->bss_type == BSS_TYPE_IBSS) {
@@ -2697,7 +2715,8 @@ static void __wl1271_op_remove_interface(struct wl1271 *wl,
                                goto deinit;
                }
 
-               wl1271_ps_elp_sleep(wl);
+               pm_runtime_mark_last_busy(wl->dev);
+               pm_runtime_put_autosuspend(wl->dev);
        }
 deinit:
        wl12xx_tx_reset_wlvif(wl, wlvif);
@@ -3121,9 +3140,11 @@ static int wl1271_op_config(struct ieee80211_hw *hw, u32 changed)
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        /* configure each interface */
        wl12xx_for_each_wlvif(wl, wlvif) {
@@ -3133,7 +3154,8 @@ static int wl1271_op_config(struct ieee80211_hw *hw, u32 changed)
        }
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -3202,9 +3224,11 @@ static void wl1271_op_configure_filter(struct ieee80211_hw *hw,
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl12xx_for_each_wlvif(wl, wlvif) {
                if (wlcore_is_p2p_mgmt(wlvif))
@@ -3247,7 +3271,8 @@ static void wl1271_op_configure_filter(struct ieee80211_hw *hw,
         */
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -3454,13 +3479,16 @@ static int wlcore_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
                goto out_wake_queues;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out_wake_queues;
+       }
 
        ret = wlcore_hw_set_key(wl, cmd, vif, sta, key_conf);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out_wake_queues:
        if (might_change_spare)
@@ -3600,9 +3628,11 @@ static void wl1271_op_set_default_key_idx(struct ieee80211_hw *hw,
                goto out_unlock;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out_unlock;
+       }
 
        wlvif->default_key = key_idx;
 
@@ -3616,7 +3646,8 @@ static void wl1271_op_set_default_key_idx(struct ieee80211_hw *hw,
        }
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out_unlock:
        mutex_unlock(&wl->mutex);
@@ -3634,7 +3665,7 @@ void wlcore_regdomain_config(struct wl1271 *wl)
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
+       ret = pm_runtime_get_sync(wl->dev);
        if (ret < 0)
                goto out;
 
@@ -3644,7 +3675,8 @@ void wlcore_regdomain_config(struct wl1271 *wl)
                goto out;
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 }
@@ -3678,9 +3710,11 @@ static int wl1271_op_hw_scan(struct ieee80211_hw *hw,
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        /* fail if there is any role in ROC */
        if (find_first_bit(wl->roc_map, WL12XX_MAX_ROLES) < WL12XX_MAX_ROLES) {
@@ -3691,7 +3725,8 @@ static int wl1271_op_hw_scan(struct ieee80211_hw *hw,
 
        ret = wlcore_scan(hw->priv, vif, ssid, len, req);
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
@@ -3718,9 +3753,11 @@ static void wl1271_op_cancel_hw_scan(struct ieee80211_hw *hw,
        if (wl->scan.state == WL1271_SCAN_STATE_IDLE)
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        if (wl->scan.state != WL1271_SCAN_STATE_DONE) {
                ret = wl->ops->scan_stop(wl, wlvif);
@@ -3741,7 +3778,8 @@ static void wl1271_op_cancel_hw_scan(struct ieee80211_hw *hw,
        ieee80211_scan_completed(wl->hw, &info);
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
@@ -3766,9 +3804,11 @@ static int wl1271_op_sched_scan_start(struct ieee80211_hw *hw,
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl->ops->sched_scan_start(wl, wlvif, req, ies);
        if (ret < 0)
@@ -3777,7 +3817,8 @@ static int wl1271_op_sched_scan_start(struct ieee80211_hw *hw,
        wl->sched_vif = wlvif;
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
        return ret;
@@ -3797,13 +3838,16 @@ static int wl1271_op_sched_scan_stop(struct ieee80211_hw *hw,
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl->ops->sched_scan_stop(wl, wlvif);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
@@ -3822,15 +3866,18 @@ static int wl1271_op_set_frag_threshold(struct ieee80211_hw *hw, u32 value)
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl1271_acx_frag_threshold(wl, value);
        if (ret < 0)
                wl1271_warning("wl1271_op_set_frag_threshold failed: %d", ret);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -3851,16 +3898,19 @@ static int wl1271_op_set_rts_threshold(struct ieee80211_hw *hw, u32 value)
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl12xx_for_each_wlvif(wl, wlvif) {
                ret = wl1271_acx_rts_threshold(wl, wlvif, value);
                if (ret < 0)
                        wl1271_warning("set rts threshold failed: %d", ret);
        }
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -4607,9 +4657,11 @@ static void wl1271_op_bss_info_changed(struct ieee80211_hw *hw,
        if (unlikely(!test_bit(WLVIF_FLAG_INITIALIZED, &wlvif->flags)))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        if ((changed & BSS_CHANGED_TXPOWER) &&
            bss_conf->txpower != wlvif->power_level) {
@@ -4626,7 +4678,8 @@ static void wl1271_op_bss_info_changed(struct ieee80211_hw *hw,
        else
                wl1271_bss_info_changed_sta(wl, vif, bss_conf, changed);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -4665,9 +4718,11 @@ static void wlcore_op_change_chanctx(struct ieee80211_hw *hw,
 
        mutex_lock(&wl->mutex);
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl12xx_for_each_wlvif(wl, wlvif) {
                struct ieee80211_vif *vif = wl12xx_wlvif_to_vif(wlvif);
@@ -4690,7 +4745,8 @@ static void wlcore_op_change_chanctx(struct ieee80211_hw *hw,
                }
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 }
@@ -4719,9 +4775,11 @@ static int wlcore_op_assign_vif_chanctx(struct ieee80211_hw *hw,
        if (unlikely(!test_bit(WLVIF_FLAG_INITIALIZED, &wlvif->flags)))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wlvif->band = ctx->def.chan->band;
        wlvif->channel = channel;
@@ -4737,7 +4795,8 @@ static int wlcore_op_assign_vif_chanctx(struct ieee80211_hw *hw,
                wlvif->radar_enabled = true;
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
@@ -4768,9 +4827,11 @@ static void wlcore_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
        if (unlikely(!test_bit(WLVIF_FLAG_INITIALIZED, &wlvif->flags)))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        if (wlvif->radar_enabled) {
                wl1271_debug(DEBUG_MAC80211, "Stop radar detection");
@@ -4778,7 +4839,8 @@ static void wlcore_op_unassign_vif_chanctx(struct ieee80211_hw *hw,
                wlvif->radar_enabled = false;
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 }
@@ -4835,9 +4897,11 @@ wlcore_op_switch_vif_chanctx(struct ieee80211_hw *hw,
 
        mutex_lock(&wl->mutex);
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        for (i = 0; i < n_vifs; i++) {
                struct wl12xx_vif *wlvif = wl12xx_vif_to_data(vifs[i].vif);
@@ -4847,7 +4911,8 @@ wlcore_op_switch_vif_chanctx(struct ieee80211_hw *hw,
                        goto out_sleep;
        }
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
@@ -4878,9 +4943,11 @@ static int wl1271_op_conf_tx(struct ieee80211_hw *hw,
        if (!test_bit(WLVIF_FLAG_INITIALIZED, &wlvif->flags))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        /*
         * the txop is confed in units of 32us by the mac80211,
@@ -4899,7 +4966,8 @@ static int wl1271_op_conf_tx(struct ieee80211_hw *hw,
                                 0, 0);
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -4923,16 +4991,19 @@ static u64 wl1271_op_get_tsf(struct ieee80211_hw *hw,
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl12xx_acx_tsf_info(wl, wlvif, &mactime);
        if (ret < 0)
                goto out_sleep;
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -5238,13 +5309,16 @@ static int wl12xx_op_sta_state(struct ieee80211_hw *hw,
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl12xx_update_sta_state(wl, wlvif, sta, old_state, new_state);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
        if (new_state < old_state)
@@ -5293,9 +5367,11 @@ static int wl1271_op_ampdu_action(struct ieee80211_hw *hw,
 
        ba_bitmap = &wl->links[hlid].ba_bitmap;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl1271_debug(DEBUG_MAC80211, "mac80211 ampdu: Rx tid %d action %d",
                     tid, action);
@@ -5368,7 +5444,8 @@ static int wl1271_op_ampdu_action(struct ieee80211_hw *hw,
                ret = -EINVAL;
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -5402,16 +5479,19 @@ static int wl12xx_set_bitrate_mask(struct ieee80211_hw *hw,
        if (wlvif->bss_type == BSS_TYPE_STA_BSS &&
            !test_bit(WLVIF_FLAG_STA_ASSOCIATED, &wlvif->flags)) {
 
-               ret = wl1271_ps_elp_wakeup(wl);
-               if (ret < 0)
+               ret = pm_runtime_get_sync(wl->dev);
+               if (ret < 0) {
+                       pm_runtime_put_noidle(wl->dev);
                        goto out;
+               }
 
                wl1271_set_band_rate(wl, wlvif);
                wlvif->basic_rate =
                        wl1271_tx_min_rate_get(wl, wlvif->basic_rate_set);
                ret = wl1271_acx_sta_rate_policies(wl, wlvif);
 
-               wl1271_ps_elp_sleep(wl);
+               pm_runtime_mark_last_busy(wl->dev);
+               pm_runtime_put_autosuspend(wl->dev);
        }
 out:
        mutex_unlock(&wl->mutex);
@@ -5441,9 +5521,11 @@ static void wl12xx_op_channel_switch(struct ieee80211_hw *hw,
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        /* TODO: change mac80211 to pass vif as param */
 
@@ -5465,7 +5547,8 @@ static void wl12xx_op_channel_switch(struct ieee80211_hw *hw,
        }
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -5532,9 +5615,11 @@ static void wlcore_op_channel_switch_beacon(struct ieee80211_hw *hw,
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl->ops->channel_switch(wl, wlvif, &ch_switch);
        if (ret)
@@ -5543,7 +5628,8 @@ static void wlcore_op_channel_switch_beacon(struct ieee80211_hw *hw,
        set_bit(WLVIF_FLAG_CS_PROGRESS, &wlvif->flags);
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 }
@@ -5584,9 +5670,11 @@ static int wlcore_op_remain_on_channel(struct ieee80211_hw *hw,
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl12xx_start_dev(wl, wlvif, chan->band, channel);
        if (ret < 0)
@@ -5596,7 +5684,8 @@ static int wlcore_op_remain_on_channel(struct ieee80211_hw *hw,
        ieee80211_queue_delayed_work(hw, &wl->roc_complete_work,
                                     msecs_to_jiffies(duration));
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
        return ret;
@@ -5638,13 +5727,16 @@ static int wlcore_roc_completed(struct wl1271 *wl)
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = __wlcore_roc_completed(wl);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
@@ -5719,19 +5811,22 @@ static void wlcore_op_sta_statistics(struct ieee80211_hw *hw,
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out_sleep;
+       }
 
        ret = wlcore_acx_average_rssi(wl, wlvif, &rssi_dbm);
        if (ret < 0)
                goto out_sleep;
 
-       sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
        sinfo->signal = rssi_dbm;
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
 out:
        mutex_unlock(&wl->mutex);
@@ -6300,7 +6395,6 @@ struct ieee80211_hw *wlcore_alloc_hw(size_t priv_size, u32 aggr_buf_size,
        skb_queue_head_init(&wl->deferred_rx_queue);
        skb_queue_head_init(&wl->deferred_tx_queue);
 
-       INIT_DELAYED_WORK(&wl->elp_work, wl1271_elp_work);
        INIT_WORK(&wl->netstack_work, wl1271_netstack_work);
        INIT_WORK(&wl->tx_work, wl1271_tx_work);
        INIT_WORK(&wl->recovery_work, wl1271_recovery_work);
@@ -6575,6 +6669,99 @@ out:
        complete_all(&wl->nvs_loading_complete);
 }
 
+static int __maybe_unused wlcore_runtime_suspend(struct device *dev)
+{
+       struct wl1271 *wl = dev_get_drvdata(dev);
+       struct wl12xx_vif *wlvif;
+       int error;
+
+       /* We do not enter elp sleep in PLT mode */
+       if (wl->plt)
+               return 0;
+
+       /* Nothing to do if no ELP mode requested */
+       if (wl->sleep_auth != WL1271_PSM_ELP)
+               return 0;
+
+       wl12xx_for_each_wlvif(wl, wlvif) {
+               if (!test_bit(WLVIF_FLAG_IN_PS, &wlvif->flags) &&
+                   test_bit(WLVIF_FLAG_IN_USE, &wlvif->flags))
+                       return -EBUSY;
+       }
+
+       wl1271_debug(DEBUG_PSM, "chip to elp");
+       error = wlcore_raw_write32(wl, HW_ACCESS_ELP_CTRL_REG, ELPCTRL_SLEEP);
+       if (error < 0) {
+               wl12xx_queue_recovery_work(wl);
+
+               return error;
+       }
+
+       set_bit(WL1271_FLAG_IN_ELP, &wl->flags);
+
+       return 0;
+}
+
+static int __maybe_unused wlcore_runtime_resume(struct device *dev)
+{
+       struct wl1271 *wl = dev_get_drvdata(dev);
+       DECLARE_COMPLETION_ONSTACK(compl);
+       unsigned long flags;
+       int ret;
+       unsigned long start_time = jiffies;
+       bool pending = false;
+
+       /* Nothing to do if no ELP mode requested */
+       if (!test_bit(WL1271_FLAG_IN_ELP, &wl->flags))
+               return 0;
+
+       wl1271_debug(DEBUG_PSM, "waking up chip from elp");
+
+       spin_lock_irqsave(&wl->wl_lock, flags);
+       if (test_bit(WL1271_FLAG_IRQ_RUNNING, &wl->flags))
+               pending = true;
+       else
+               wl->elp_compl = &compl;
+       spin_unlock_irqrestore(&wl->wl_lock, flags);
+
+       ret = wlcore_raw_write32(wl, HW_ACCESS_ELP_CTRL_REG, ELPCTRL_WAKE_UP);
+       if (ret < 0) {
+               wl12xx_queue_recovery_work(wl);
+               goto err;
+       }
+
+       if (!pending) {
+               ret = wait_for_completion_timeout(&compl,
+                       msecs_to_jiffies(WL1271_WAKEUP_TIMEOUT));
+               if (ret == 0) {
+                       wl1271_error("ELP wakeup timeout!");
+                       wl12xx_queue_recovery_work(wl);
+
+                       /* Return no error for runtime PM for recovery */
+                       return 0;
+               }
+       }
+
+       clear_bit(WL1271_FLAG_IN_ELP, &wl->flags);
+
+       wl1271_debug(DEBUG_PSM, "wakeup time: %u ms",
+                    jiffies_to_msecs(jiffies - start_time));
+
+       return 0;
+
+err:
+       spin_lock_irqsave(&wl->wl_lock, flags);
+       wl->elp_compl = NULL;
+       spin_unlock_irqrestore(&wl->wl_lock, flags);
+       return ret;
+}
+
+static const struct dev_pm_ops wlcore_pm_ops = {
+       SET_RUNTIME_PM_OPS(wlcore_runtime_suspend,
+                          wlcore_runtime_resume,
+                          NULL)
+};
+
 int wlcore_probe(struct wl1271 *wl, struct platform_device *pdev)
 {
        struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev);
@@ -6602,6 +6789,11 @@ int wlcore_probe(struct wl1271 *wl, struct platform_device *pdev)
                wlcore_nvs_cb(NULL, wl);
        }
 
+       wl->dev->driver->pm = &wlcore_pm_ops;
+       pm_runtime_set_autosuspend_delay(wl->dev, 50);
+       pm_runtime_use_autosuspend(wl->dev);
+       pm_runtime_enable(wl->dev);
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(wlcore_probe);
@@ -6610,6 +6802,13 @@ int wlcore_remove(struct platform_device *pdev)
 {
        struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev);
        struct wl1271 *wl = platform_get_drvdata(pdev);
+       int error;
+
+       error = pm_runtime_get_sync(wl->dev);
+       if (error < 0)
+               dev_warn(wl->dev, "PM runtime failed: %i\n", error);
+
+       wl->dev->driver->pm = NULL;
 
        if (pdev_data->family && pdev_data->family->nvs_name)
                wait_for_completion(&wl->nvs_loading_complete);
@@ -6621,6 +6820,11 @@ int wlcore_remove(struct platform_device *pdev)
                disable_irq_wake(wl->irq);
        }
        wl1271_unregister_hw(wl);
+
+       pm_runtime_put_sync(wl->dev);
+       pm_runtime_dont_use_autosuspend(wl->dev);
+       pm_runtime_disable(wl->dev);
+
        free_irq(wl->irq, wl);
        wlcore_free_hw(wl);
 
index b36133b739cb383e36af616afd890e528feeca4c..9de843d1984b0ada713c029aca93a360cd9fd8b0 100644 (file)
 #include "tx.h"
 #include "debug.h"
 
-#define WL1271_WAKEUP_TIMEOUT 500
-
-#define ELP_ENTRY_DELAY  30
-#define ELP_ENTRY_DELAY_FORCE_PS  5
-
-void wl1271_elp_work(struct work_struct *work)
-{
-       struct delayed_work *dwork;
-       struct wl1271 *wl;
-       struct wl12xx_vif *wlvif;
-       int ret;
-
-       dwork = to_delayed_work(work);
-       wl = container_of(dwork, struct wl1271, elp_work);
-
-       wl1271_debug(DEBUG_PSM, "elp work");
-
-       mutex_lock(&wl->mutex);
-
-       if (unlikely(wl->state != WLCORE_STATE_ON))
-               goto out;
-
-       /* our work might have been already cancelled */
-       if (unlikely(!test_bit(WL1271_FLAG_ELP_REQUESTED, &wl->flags)))
-               goto out;
-
-       if (test_bit(WL1271_FLAG_IN_ELP, &wl->flags))
-               goto out;
-
-       wl12xx_for_each_wlvif(wl, wlvif) {
-               if (!test_bit(WLVIF_FLAG_IN_PS, &wlvif->flags) &&
-                   test_bit(WLVIF_FLAG_IN_USE, &wlvif->flags))
-                       goto out;
-       }
-
-       wl1271_debug(DEBUG_PSM, "chip to elp");
-       ret = wlcore_raw_write32(wl, HW_ACCESS_ELP_CTRL_REG, ELPCTRL_SLEEP);
-       if (ret < 0) {
-               wl12xx_queue_recovery_work(wl);
-               goto out;
-       }
-
-       set_bit(WL1271_FLAG_IN_ELP, &wl->flags);
-
-out:
-       mutex_unlock(&wl->mutex);
-}
-
-/* Routines to toggle sleep mode while in ELP */
-void wl1271_ps_elp_sleep(struct wl1271 *wl)
-{
-       struct wl12xx_vif *wlvif;
-       u32 timeout;
-
-       /* We do not enter elp sleep in PLT mode */
-       if (wl->plt)
-               return;
-
-       if (wl->sleep_auth != WL1271_PSM_ELP)
-               return;
-
-       /* we shouldn't get consecutive sleep requests */
-       if (WARN_ON(test_and_set_bit(WL1271_FLAG_ELP_REQUESTED, &wl->flags)))
-               return;
-
-       wl12xx_for_each_wlvif(wl, wlvif) {
-               if (!test_bit(WLVIF_FLAG_IN_PS, &wlvif->flags) &&
-                   test_bit(WLVIF_FLAG_IN_USE, &wlvif->flags))
-                       return;
-       }
-
-       timeout = wl->conf.conn.forced_ps ?
-                       ELP_ENTRY_DELAY_FORCE_PS : ELP_ENTRY_DELAY;
-       ieee80211_queue_delayed_work(wl->hw, &wl->elp_work,
-                                    msecs_to_jiffies(timeout));
-}
-EXPORT_SYMBOL_GPL(wl1271_ps_elp_sleep);
-
-int wl1271_ps_elp_wakeup(struct wl1271 *wl)
-{
-       DECLARE_COMPLETION_ONSTACK(compl);
-       unsigned long flags;
-       int ret;
-       unsigned long start_time = jiffies;
-       bool pending = false;
-
-       /*
-        * we might try to wake up even if we didn't go to sleep
-        * before (e.g. on boot)
-        */
-       if (!test_and_clear_bit(WL1271_FLAG_ELP_REQUESTED, &wl->flags))
-               return 0;
-
-       /* don't cancel_sync as it might contend for a mutex and deadlock */
-       cancel_delayed_work(&wl->elp_work);
-
-       if (!test_bit(WL1271_FLAG_IN_ELP, &wl->flags))
-               return 0;
-
-       wl1271_debug(DEBUG_PSM, "waking up chip from elp");
-
-       /*
-        * The spinlock is required here to synchronize both the work and
-        * the completion variable in one entity.
-        */
-       spin_lock_irqsave(&wl->wl_lock, flags);
-       if (test_bit(WL1271_FLAG_IRQ_RUNNING, &wl->flags))
-               pending = true;
-       else
-               wl->elp_compl = &compl;
-       spin_unlock_irqrestore(&wl->wl_lock, flags);
-
-       ret = wlcore_raw_write32(wl, HW_ACCESS_ELP_CTRL_REG, ELPCTRL_WAKE_UP);
-       if (ret < 0) {
-               wl12xx_queue_recovery_work(wl);
-               goto err;
-       }
-
-       if (!pending) {
-               ret = wait_for_completion_timeout(
-                       &compl, msecs_to_jiffies(WL1271_WAKEUP_TIMEOUT));
-               if (ret == 0) {
-                       wl1271_error("ELP wakeup timeout!");
-                       wl12xx_queue_recovery_work(wl);
-                       ret = -ETIMEDOUT;
-                       goto err;
-               }
-       }
-
-       clear_bit(WL1271_FLAG_IN_ELP, &wl->flags);
-
-       wl1271_debug(DEBUG_PSM, "wakeup time: %u ms",
-                    jiffies_to_msecs(jiffies - start_time));
-       goto out;
-
-err:
-       spin_lock_irqsave(&wl->wl_lock, flags);
-       wl->elp_compl = NULL;
-       spin_unlock_irqrestore(&wl->wl_lock, flags);
-       return ret;
-
-out:
-       return 0;
-}
-EXPORT_SYMBOL_GPL(wl1271_ps_elp_wakeup);
-
 int wl1271_ps_set_mode(struct wl1271 *wl, struct wl12xx_vif *wlvif,
                       enum wl1271_cmd_ps_mode mode)
 {
index de4f9da8ed26b77e13c20437b8e16263192af8f1..411727587f95eba35e32ed69c58715afdc93c918 100644 (file)
@@ -29,9 +29,6 @@
 
 int wl1271_ps_set_mode(struct wl1271 *wl, struct wl12xx_vif *wlvif,
                       enum wl1271_cmd_ps_mode mode);
-void wl1271_ps_elp_sleep(struct wl1271 *wl);
-int wl1271_ps_elp_wakeup(struct wl1271 *wl);
-void wl1271_elp_work(struct work_struct *work);
 void wl12xx_ps_link_start(struct wl1271 *wl, struct wl12xx_vif *wlvif,
                          u8 hlid, bool clean_queues);
 void wl12xx_ps_link_end(struct wl1271 *wl, struct wl12xx_vif *wlvif, u8 hlid);
index 5612f5916b4efac96def81dadc7f3408b7133d70..764e723e4ef97f723a2e127a7fd2714f878d75a4 100644 (file)
  */
 
 #include <linux/ieee80211.h>
+#include <linux/pm_runtime.h>
 
 #include "wlcore.h"
 #include "debug.h"
 #include "cmd.h"
 #include "scan.h"
 #include "acx.h"
-#include "ps.h"
 #include "tx.h"
 
 void wl1271_scan_complete_work(struct work_struct *work)
@@ -67,17 +67,17 @@ void wl1271_scan_complete_work(struct work_struct *work)
        wl->scan.req = NULL;
        wl->scan_wlvif = NULL;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        if (test_bit(WLVIF_FLAG_STA_ASSOCIATED, &wlvif->flags)) {
                /* restore hardware connection monitoring template */
                wl1271_cmd_build_ap_probe_req(wl, wlvif, wlvif->probereq);
        }
 
-       wl1271_ps_elp_sleep(wl);
-
        if (wl->scan.failed) {
                wl1271_info("Scan completed due to error.");
                wl12xx_queue_recovery_work(wl);
@@ -85,6 +85,9 @@ void wl1271_scan_complete_work(struct work_struct *work)
 
        wlcore_cmd_regdomain_config_locked(wl);
 
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
+
        ieee80211_scan_completed(wl->hw, &info);
 
 out:
index d31eb775e023daa57398e73052d222ebab995b87..7425ba9471d0741c7586b39b1a8b141666759883 100644 (file)
  *
  */
 
+#include <linux/pm_runtime.h>
+
+#include "acx.h"
 #include "wlcore.h"
 #include "debug.h"
-#include "ps.h"
 #include "sysfs.h"
 
 static ssize_t wl1271_sysfs_show_bt_coex_state(struct device *dev,
@@ -68,12 +70,15 @@ static ssize_t wl1271_sysfs_store_bt_coex_state(struct device *dev,
        if (unlikely(wl->state != WLCORE_STATE_ON))
                goto out;
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        wl1271_acx_sg_enable(wl, wl->sg_enabled);
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 
  out:
        mutex_unlock(&wl->mutex);
index 009ec07c4cec14f057b39fe122aa3a6acb943eb8..dcb2c8b0feb63ae4b1ecb35a0d43ab1fdddf30b9 100644 (file)
  */
 #include "testmode.h"
 
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <net/genetlink.h>
 
 #include "wlcore.h"
 #include "debug.h"
 #include "acx.h"
-#include "ps.h"
 #include "io.h"
 
 #define WL1271_TM_MAX_DATA_LENGTH 1024
@@ -97,9 +97,11 @@ static int wl1271_tm_cmd_test(struct wl1271 *wl, struct nlattr *tb[])
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wl1271_cmd_test(wl, buf, buf_len, answer);
        if (ret < 0) {
@@ -141,7 +143,8 @@ static int wl1271_tm_cmd_test(struct wl1271 *wl, struct nlattr *tb[])
        }
 
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
@@ -169,9 +172,11 @@ static int wl1271_tm_cmd_interrogate(struct wl1271 *wl, struct nlattr *tb[])
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
        if (!cmd) {
@@ -205,7 +210,8 @@ static int wl1271_tm_cmd_interrogate(struct wl1271 *wl, struct nlattr *tb[])
 out_free:
        kfree(cmd);
 out_sleep:
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
index 00e9b4624dcf452c960fe6bdf32e00a02db67f73..b6e19c2d66b0a80c61b789ccf3f574cad300a955 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/etherdevice.h>
+#include <linux/pm_runtime.h>
 #include <linux/spinlock.h>
 
 #include "wlcore.h"
@@ -868,9 +869,11 @@ void wl1271_tx_work(struct work_struct *work)
        int ret;
 
        mutex_lock(&wl->mutex);
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wlcore_tx_work_locked(wl);
        if (ret < 0) {
@@ -878,7 +881,8 @@ void wl1271_tx_work(struct work_struct *work)
                goto out;
        }
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 }
index 5c0bcb1fe1a1f2e2468d629afe951cc4826e3719..dbe78d8491effa32a3356d8a4cd238de5ae79d88 100644 (file)
@@ -8,12 +8,13 @@
  * version 2 as published by the Free Software Foundation.
  */
 
+#include <linux/pm_runtime.h>
+
 #include <net/mac80211.h>
 #include <net/netlink.h>
 
 #include "wlcore.h"
 #include "debug.h"
-#include "ps.h"
 #include "hw_ops.h"
 #include "vendor_cmd.h"
 
@@ -55,14 +56,17 @@ wlcore_vendor_cmd_smart_config_start(struct wiphy *wiphy,
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wlcore_smart_config_start(wl,
                        nla_get_u32(tb[WLCORE_VENDOR_ATTR_GROUP_ID]));
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
@@ -87,13 +91,16 @@ wlcore_vendor_cmd_smart_config_stop(struct wiphy *wiphy,
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wlcore_smart_config_stop(wl);
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
@@ -131,16 +138,19 @@ wlcore_vendor_cmd_smart_config_set_group_key(struct wiphy *wiphy,
                goto out;
        }
 
-       ret = wl1271_ps_elp_wakeup(wl);
-       if (ret < 0)
+       ret = pm_runtime_get_sync(wl->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(wl->dev);
                goto out;
+       }
 
        ret = wlcore_smart_config_set_group_key(wl,
                        nla_get_u32(tb[WLCORE_VENDOR_ATTR_GROUP_ID]),
                        nla_len(tb[WLCORE_VENDOR_ATTR_GROUP_KEY]),
                        nla_data(tb[WLCORE_VENDOR_ATTR_GROUP_KEY]));
 
-       wl1271_ps_elp_sleep(wl);
+       pm_runtime_mark_last_busy(wl->dev);
+       pm_runtime_put_autosuspend(wl->dev);
 out:
        mutex_unlock(&wl->mutex);
 
index 95fbedc8ea3429a77ae3094b6990fbd01f3305eb..d4b1f66ef45701871e76c0030adbbac2296414ff 100644 (file)
@@ -348,7 +348,6 @@ struct wl1271 {
        enum nl80211_band band;
 
        struct completion *elp_compl;
-       struct delayed_work elp_work;
 
        /* in dBm */
        int power_level;
index e840985385fcef4379f683b209688e754e611333..32ec121ccac281555788ddbbe04eec3ab4b82fea 100644 (file)
@@ -233,7 +233,6 @@ enum wl12xx_flags {
        WL1271_FLAG_TX_QUEUE_STOPPED,
        WL1271_FLAG_TX_PENDING,
        WL1271_FLAG_IN_ELP,
-       WL1271_FLAG_ELP_REQUESTED,
        WL1271_FLAG_IRQ_RUNNING,
        WL1271_FLAG_FW_TX_BUSY,
        WL1271_FLAG_DUMMY_PACKET_PENDING,
index 07b94eda96041070ba01063683dcdcf7295e8de5..dd6a86b899eb471bf13a1350ea10205e855cce34 100644 (file)
@@ -1341,7 +1341,7 @@ int zd_chip_control_leds(struct zd_chip *chip, enum led_status status)
        case ZD_LED_SCANNING:
                ioreqs[0].value = FW_LINK_OFF;
                ioreqs[1].value = v[1] & ~other_led;
-               if (get_seconds() % 3 == 0) {
+               if ((u32)ktime_get_seconds() % 3 == 0) {
                        ioreqs[1].value &= ~chip->link_led;
                } else {
                        ioreqs[1].value |= chip->link_led;
index c30bf118c67d81beced03f28279b11a838e02ad2..c2cda3acd4af37a2ca4130283010e7faba8d3af1 100644 (file)
@@ -371,25 +371,27 @@ static inline void handle_regs_int_override(struct urb *urb)
 {
        struct zd_usb *usb = urb->context;
        struct zd_usb_interrupt *intr = &usb->intr;
+       unsigned long flags;
 
-       spin_lock(&intr->lock);
+       spin_lock_irqsave(&intr->lock, flags);
        if (atomic_read(&intr->read_regs_enabled)) {
                atomic_set(&intr->read_regs_enabled, 0);
                intr->read_regs_int_overridden = 1;
                complete(&intr->read_regs.completion);
        }
-       spin_unlock(&intr->lock);
+       spin_unlock_irqrestore(&intr->lock, flags);
 }
 
 static inline void handle_regs_int(struct urb *urb)
 {
        struct zd_usb *usb = urb->context;
        struct zd_usb_interrupt *intr = &usb->intr;
+       unsigned long flags;
        int len;
        u16 int_num;
 
        ZD_ASSERT(in_interrupt());
-       spin_lock(&intr->lock);
+       spin_lock_irqsave(&intr->lock, flags);
 
        int_num = le16_to_cpu(*(__le16 *)(urb->transfer_buffer+2));
        if (int_num == CR_INTERRUPT) {
@@ -425,7 +427,7 @@ static inline void handle_regs_int(struct urb *urb)
        }
 
 out:
-       spin_unlock(&intr->lock);
+       spin_unlock_irqrestore(&intr->lock, flags);
 
        /* CR_INTERRUPT might override read_reg too. */
        if (int_num == CR_INTERRUPT && atomic_read(&intr->read_regs_enabled))
@@ -665,6 +667,7 @@ static void rx_urb_complete(struct urb *urb)
        struct zd_usb_rx *rx;
        const u8 *buffer;
        unsigned int length;
+       unsigned long flags;
 
        switch (urb->status) {
        case 0:
@@ -693,14 +696,14 @@ static void rx_urb_complete(struct urb *urb)
                /* If there is an old first fragment, we don't care. */
                dev_dbg_f(urb_dev(urb), "*** first fragment ***\n");
                ZD_ASSERT(length <= ARRAY_SIZE(rx->fragment));
-               spin_lock(&rx->lock);
+               spin_lock_irqsave(&rx->lock, flags);
                memcpy(rx->fragment, buffer, length);
                rx->fragment_length = length;
-               spin_unlock(&rx->lock);
+               spin_unlock_irqrestore(&rx->lock, flags);
                goto resubmit;
        }
 
-       spin_lock(&rx->lock);
+       spin_lock_irqsave(&rx->lock, flags);
        if (rx->fragment_length > 0) {
                /* We are on a second fragment, we believe */
                ZD_ASSERT(length + rx->fragment_length <=
@@ -710,9 +713,9 @@ static void rx_urb_complete(struct urb *urb)
                handle_rx_packet(usb, rx->fragment,
                                 rx->fragment_length + length);
                rx->fragment_length = 0;
-               spin_unlock(&rx->lock);
+               spin_unlock_irqrestore(&rx->lock, flags);
        } else {
-               spin_unlock(&rx->lock);
+               spin_unlock_irqrestore(&rx->lock, flags);
                handle_rx_packet(usb, buffer, length);
        }
 
index 78ebe494fef02b8d31505262f8c551e27aee7dc5..92274c2372008a57ba12ca960bafa84cd2eac7b3 100644 (file)
@@ -148,14 +148,14 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
 }
 
 static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
-                              void *accel_priv,
+                              struct net_device *sb_dev,
                               select_queue_fallback_t fallback)
 {
        struct xenvif *vif = netdev_priv(dev);
        unsigned int size = vif->hash.size;
 
        if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
-               return fallback(dev, skb) % dev->real_num_tx_queues;
+               return fallback(dev, skb, NULL) % dev->real_num_tx_queues;
 
        xenvif_set_skb_hash(vif, skb);
 
index a27daa23c9dc9f4f5b52c5c2da335527f07a1366..3621e05a7494c43a94084fa79a6959fa428f64ec 100644 (file)
@@ -1603,9 +1603,9 @@ static void xenvif_ctrl_action(struct xenvif *vif)
 static bool xenvif_ctrl_work_todo(struct xenvif *vif)
 {
        if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->ctrl)))
-               return 1;
+               return true;
 
-       return 0;
+       return false;
 }
 
 irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
index 2d8812dd1534ae5ad2a9b6a6725861b2da3473d9..799cba4624a5e07a15b3b19151ea2094c5ff9a33 100644 (file)
@@ -546,7 +546,8 @@ static int xennet_count_skb_slots(struct sk_buff *skb)
 }
 
 static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
-                              void *accel_priv, select_queue_fallback_t fallback)
+                              struct net_device *sb_dev,
+                              select_queue_fallback_t fallback)
 {
        unsigned int num_queues = dev->real_num_tx_queues;
        u32 hash;
@@ -1610,7 +1611,7 @@ static int xennet_init_queue(struct netfront_queue *queue)
        timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0);
 
        snprintf(queue->name, sizeof(queue->name), "%s-q%u",
-                queue->info->netdev->name, queue->id);
+                queue->info->xbdev->nodename, queue->id);
 
        /* Initialise tx_skbs as a free chain containing every entry. */
        queue->tx_skb_freelist = 0;
index d963baf8e53a22b53125a60b8c5c4dba1b1d5a33..e92391d6d1bd6a52de298fc53dedf5902016b005 100644 (file)
@@ -367,14 +367,23 @@ struct phy_device *of_phy_get_and_connect(struct net_device *dev,
        phy_interface_t iface;
        struct device_node *phy_np;
        struct phy_device *phy;
+       int ret;
 
        iface = of_get_phy_mode(np);
        if (iface < 0)
                return NULL;
-
-       phy_np = of_parse_phandle(np, "phy-handle", 0);
-       if (!phy_np)
-               return NULL;
+       if (of_phy_is_fixed_link(np)) {
+               ret = of_phy_register_fixed_link(np);
+               if (ret < 0) {
+                       netdev_err(dev, "broken fixed-link specification\n");
+                       return NULL;
+               }
+               phy_np = of_node_get(np);
+       } else {
+               phy_np = of_parse_phandle(np, "phy-handle", 0);
+               if (!phy_np)
+                       return NULL;
+       }
 
        phy = of_phy_connect(dev, phy_np, hndlr, 0, iface);
 
index 474c988d2e95eb72dbe91b16e24cbd75bf6a9dfe..d137c480db46b58df2660a740377e4c4e3437709 100644 (file)
@@ -43,7 +43,7 @@ config PTP_1588_CLOCK_DTE
 
 config PTP_1588_CLOCK_QORIQ
        tristate "Freescale QorIQ 1588 timer as PTP clock"
-       depends on GIANFAR
+       depends on GIANFAR || FSL_DPAA_ETH
        depends on PTP_1588_CLOCK
        default y
        help
index e8652c148c5223d24c67089539b3bb4e861e42d5..a14c317b5a3873cc0d7e77e54df78beb2762a686 100644 (file)
 /* Caller must hold qoriq_ptp->lock. */
 static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp)
 {
+       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
        u64 ns;
        u32 lo, hi;
 
-       lo = qoriq_read(&qoriq_ptp->regs->tmr_cnt_l);
-       hi = qoriq_read(&qoriq_ptp->regs->tmr_cnt_h);
+       lo = qoriq_read(&regs->ctrl_regs->tmr_cnt_l);
+       hi = qoriq_read(&regs->ctrl_regs->tmr_cnt_h);
        ns = ((u64) hi) << 32;
        ns |= lo;
        return ns;
@@ -52,16 +53,18 @@ static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp)
 /* Caller must hold qoriq_ptp->lock. */
 static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns)
 {
+       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
        u32 hi = ns >> 32;
        u32 lo = ns & 0xffffffff;
 
-       qoriq_write(&qoriq_ptp->regs->tmr_cnt_l, lo);
-       qoriq_write(&qoriq_ptp->regs->tmr_cnt_h, hi);
+       qoriq_write(&regs->ctrl_regs->tmr_cnt_l, lo);
+       qoriq_write(&regs->ctrl_regs->tmr_cnt_h, hi);
 }
 
 /* Caller must hold qoriq_ptp->lock. */
 static void set_alarm(struct qoriq_ptp *qoriq_ptp)
 {
+       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
        u64 ns;
        u32 lo, hi;
 
@@ -70,16 +73,18 @@ static void set_alarm(struct qoriq_ptp *qoriq_ptp)
        ns -= qoriq_ptp->tclk_period;
        hi = ns >> 32;
        lo = ns & 0xffffffff;
-       qoriq_write(&qoriq_ptp->regs->tmr_alarm1_l, lo);
-       qoriq_write(&qoriq_ptp->regs->tmr_alarm1_h, hi);
+       qoriq_write(&regs->alarm_regs->tmr_alarm1_l, lo);
+       qoriq_write(&regs->alarm_regs->tmr_alarm1_h, hi);
 }
 
 /* Caller must hold qoriq_ptp->lock. */
 static void set_fipers(struct qoriq_ptp *qoriq_ptp)
 {
+       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+
        set_alarm(qoriq_ptp);
-       qoriq_write(&qoriq_ptp->regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
-       qoriq_write(&qoriq_ptp->regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
+       qoriq_write(&regs->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
+       qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
 }
 
 /*
@@ -89,16 +94,17 @@ static void set_fipers(struct qoriq_ptp *qoriq_ptp)
 static irqreturn_t isr(int irq, void *priv)
 {
        struct qoriq_ptp *qoriq_ptp = priv;
+       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
        struct ptp_clock_event event;
        u64 ns;
        u32 ack = 0, lo, hi, mask, val;
 
-       val = qoriq_read(&qoriq_ptp->regs->tmr_tevent);
+       val = qoriq_read(&regs->ctrl_regs->tmr_tevent);
 
        if (val & ETS1) {
                ack |= ETS1;
-               hi = qoriq_read(&qoriq_ptp->regs->tmr_etts1_h);
-               lo = qoriq_read(&qoriq_ptp->regs->tmr_etts1_l);
+               hi = qoriq_read(&regs->etts_regs->tmr_etts1_h);
+               lo = qoriq_read(&regs->etts_regs->tmr_etts1_l);
                event.type = PTP_CLOCK_EXTTS;
                event.index = 0;
                event.timestamp = ((u64) hi) << 32;
@@ -108,8 +114,8 @@ static irqreturn_t isr(int irq, void *priv)
 
        if (val & ETS2) {
                ack |= ETS2;
-               hi = qoriq_read(&qoriq_ptp->regs->tmr_etts2_h);
-               lo = qoriq_read(&qoriq_ptp->regs->tmr_etts2_l);
+               hi = qoriq_read(&regs->etts_regs->tmr_etts2_h);
+               lo = qoriq_read(&regs->etts_regs->tmr_etts2_l);
                event.type = PTP_CLOCK_EXTTS;
                event.index = 1;
                event.timestamp = ((u64) hi) << 32;
@@ -130,16 +136,16 @@ static irqreturn_t isr(int irq, void *priv)
                        hi = ns >> 32;
                        lo = ns & 0xffffffff;
                        spin_lock(&qoriq_ptp->lock);
-                       qoriq_write(&qoriq_ptp->regs->tmr_alarm2_l, lo);
-                       qoriq_write(&qoriq_ptp->regs->tmr_alarm2_h, hi);
+                       qoriq_write(&regs->alarm_regs->tmr_alarm2_l, lo);
+                       qoriq_write(&regs->alarm_regs->tmr_alarm2_h, hi);
                        spin_unlock(&qoriq_ptp->lock);
                        qoriq_ptp->alarm_value = ns;
                } else {
-                       qoriq_write(&qoriq_ptp->regs->tmr_tevent, ALM2);
+                       qoriq_write(&regs->ctrl_regs->tmr_tevent, ALM2);
                        spin_lock(&qoriq_ptp->lock);
-                       mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
+                       mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
                        mask &= ~ALM2EN;
-                       qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+                       qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
                        spin_unlock(&qoriq_ptp->lock);
                        qoriq_ptp->alarm_value = 0;
                        qoriq_ptp->alarm_interval = 0;
@@ -153,7 +159,7 @@ static irqreturn_t isr(int irq, void *priv)
        }
 
        if (ack) {
-               qoriq_write(&qoriq_ptp->regs->tmr_tevent, ack);
+               qoriq_write(&regs->ctrl_regs->tmr_tevent, ack);
                return IRQ_HANDLED;
        } else
                return IRQ_NONE;
@@ -169,6 +175,7 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
        u32 tmr_add;
        int neg_adj = 0;
        struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 
        if (scaled_ppm < 0) {
                neg_adj = 1;
@@ -186,7 +193,7 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 
        tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
 
-       qoriq_write(&qoriq_ptp->regs->tmr_add, tmr_add);
+       qoriq_write(&regs->ctrl_regs->tmr_add, tmr_add);
 
        return 0;
 }
@@ -250,6 +257,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
                              struct ptp_clock_request *rq, int on)
 {
        struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
        unsigned long flags;
        u32 bit, mask;
 
@@ -266,23 +274,23 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
                        return -EINVAL;
                }
                spin_lock_irqsave(&qoriq_ptp->lock, flags);
-               mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
+               mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
                if (on)
                        mask |= bit;
                else
                        mask &= ~bit;
-               qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+               qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
                spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
                return 0;
 
        case PTP_CLK_REQ_PPS:
                spin_lock_irqsave(&qoriq_ptp->lock, flags);
-               mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
+               mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
                if (on)
                        mask |= PP1EN;
                else
                        mask &= ~PP1EN;
-               qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+               qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
                spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
                return 0;
 
@@ -313,10 +321,12 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 {
        struct device_node *node = dev->dev.of_node;
        struct qoriq_ptp *qoriq_ptp;
+       struct qoriq_ptp_registers *regs;
        struct timespec64 now;
        int err = -ENOMEM;
        u32 tmr_ctrl;
        unsigned long flags;
+       void __iomem *base;
 
        qoriq_ptp = kzalloc(sizeof(*qoriq_ptp), GFP_KERNEL);
        if (!qoriq_ptp)
@@ -351,7 +361,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
                pr_err("irq not in device tree\n");
                goto no_node;
        }
-       if (request_irq(qoriq_ptp->irq, isr, 0, DRIVER, qoriq_ptp)) {
+       if (request_irq(qoriq_ptp->irq, isr, IRQF_SHARED, DRIVER, qoriq_ptp)) {
                pr_err("request_irq failed\n");
                goto no_node;
        }
@@ -368,12 +378,27 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 
        spin_lock_init(&qoriq_ptp->lock);
 
-       qoriq_ptp->regs = ioremap(qoriq_ptp->rsrc->start,
-                               resource_size(qoriq_ptp->rsrc));
-       if (!qoriq_ptp->regs) {
+       base = ioremap(qoriq_ptp->rsrc->start,
+                      resource_size(qoriq_ptp->rsrc));
+       if (!base) {
                pr_err("ioremap ptp registers failed\n");
                goto no_ioremap;
        }
+
+       qoriq_ptp->base = base;
+
+       if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) {
+               qoriq_ptp->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET;
+               qoriq_ptp->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET;
+               qoriq_ptp->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET;
+               qoriq_ptp->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET;
+       } else {
+               qoriq_ptp->regs.ctrl_regs = base + CTRL_REGS_OFFSET;
+               qoriq_ptp->regs.alarm_regs = base + ALARM_REGS_OFFSET;
+               qoriq_ptp->regs.fiper_regs = base + FIPER_REGS_OFFSET;
+               qoriq_ptp->regs.etts_regs = base + ETTS_REGS_OFFSET;
+       }
+
        ktime_get_real_ts64(&now);
        ptp_qoriq_settime(&qoriq_ptp->caps, &now);
 
@@ -383,13 +408,14 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 
        spin_lock_irqsave(&qoriq_ptp->lock, flags);
 
-       qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   tmr_ctrl);
-       qoriq_write(&qoriq_ptp->regs->tmr_add,    qoriq_ptp->tmr_add);
-       qoriq_write(&qoriq_ptp->regs->tmr_prsc,   qoriq_ptp->tmr_prsc);
-       qoriq_write(&qoriq_ptp->regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
-       qoriq_write(&qoriq_ptp->regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
+       regs = &qoriq_ptp->regs;
+       qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl);
+       qoriq_write(&regs->ctrl_regs->tmr_add,    qoriq_ptp->tmr_add);
+       qoriq_write(&regs->ctrl_regs->tmr_prsc,   qoriq_ptp->tmr_prsc);
+       qoriq_write(&regs->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
+       qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
        set_alarm(qoriq_ptp);
-       qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
+       qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
 
        spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 
@@ -405,7 +431,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
        return 0;
 
 no_clock:
-       iounmap(qoriq_ptp->regs);
+       iounmap(qoriq_ptp->base);
 no_ioremap:
        release_resource(qoriq_ptp->rsrc);
 no_resource:
@@ -419,12 +445,13 @@ no_memory:
 static int qoriq_ptp_remove(struct platform_device *dev)
 {
        struct qoriq_ptp *qoriq_ptp = platform_get_drvdata(dev);
+       struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 
-       qoriq_write(&qoriq_ptp->regs->tmr_temask, 0);
-       qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   0);
+       qoriq_write(&regs->ctrl_regs->tmr_temask, 0);
+       qoriq_write(&regs->ctrl_regs->tmr_ctrl,   0);
 
        ptp_clock_unregister(qoriq_ptp->clock);
-       iounmap(qoriq_ptp->regs);
+       iounmap(qoriq_ptp->base);
        release_resource(qoriq_ptp->rsrc);
        free_irq(qoriq_ptp->irq, qoriq_ptp);
        kfree(qoriq_ptp);
@@ -434,6 +461,7 @@ static int qoriq_ptp_remove(struct platform_device *dev)
 
 static const struct of_device_id match_table[] = {
        { .compatible = "fsl,etsec-ptp" },
+       { .compatible = "fsl,fman-ptp-timer" },
        {},
 };
 MODULE_DEVICE_TABLE(of, match_table);
index c7e484f706543ebdc2ce63084cf1d45a2146869e..7c5a25ddf8321e9111a307d39002a237945f2856 100644 (file)
@@ -95,4 +95,14 @@ config CCWGROUP
        tristate
        default (LCS || CTCM || QETH)
 
+config ISM
+       tristate "Support for ISM vPCI Adapter"
+       depends on PCI && SMC
+       default n
+       help
+         Select this option if you want to use the Internal Shared Memory
+         vPCI Adapter.
+
+         To compile as a module choose M. The module name is ism.
+         If unsure, choose N.
 endmenu
index 513b7ae64980ef1da15bfcfa04f2689a6723300b..f2d6bbe57a6fcf97e70c5adb5449c3caf3bbb530 100644 (file)
@@ -15,3 +15,6 @@ qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o
 obj-$(CONFIG_QETH_L2) += qeth_l2.o
 qeth_l3-y += qeth_l3_main.o qeth_l3_sys.o
 obj-$(CONFIG_QETH_L3) += qeth_l3.o
+
+ism-y := ism_drv.o
+obj-$(CONFIG_ISM) += ism.o
diff --git a/drivers/s390/net/ism.h b/drivers/s390/net/ism.h
new file mode 100644 (file)
index 0000000..0aab908
--- /dev/null
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef S390_ISM_H
+#define S390_ISM_H
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <net/smc.h>
+
+#define UTIL_STR_LEN   16
+
+/*
+ * Do not use the first word of the DMB bits to ensure 8 byte aligned access.
+ */
+#define ISM_DMB_WORD_OFFSET    1
+#define ISM_DMB_BIT_OFFSET     (ISM_DMB_WORD_OFFSET * 32)
+#define ISM_NR_DMBS            1920
+
+#define ISM_REG_SBA    0x1
+#define ISM_REG_IEQ    0x2
+#define ISM_READ_GID   0x3
+#define ISM_ADD_VLAN_ID        0x4
+#define ISM_DEL_VLAN_ID        0x5
+#define ISM_SET_VLAN   0x6
+#define ISM_RESET_VLAN 0x7
+#define ISM_QUERY_INFO 0x8
+#define ISM_QUERY_RGID 0x9
+#define ISM_REG_DMB    0xA
+#define ISM_UNREG_DMB  0xB
+#define ISM_SIGNAL_IEQ 0xE
+#define ISM_UNREG_SBA  0x11
+#define ISM_UNREG_IEQ  0x12
+
+#define ISM_ERROR      0xFFFF
+
+struct ism_req_hdr {
+       u32 cmd;
+       u16 : 16;
+       u16 len;
+};
+
+struct ism_resp_hdr {
+       u32 cmd;
+       u16 ret;
+       u16 len;
+};
+
+union ism_reg_sba {
+       struct {
+               struct ism_req_hdr hdr;
+               u64 sba;
+       } request;
+       struct {
+               struct ism_resp_hdr hdr;
+       } response;
+} __aligned(16);
+
+union ism_reg_ieq {
+       struct {
+               struct ism_req_hdr hdr;
+               u64 ieq;
+               u64 len;
+       } request;
+       struct {
+               struct ism_resp_hdr hdr;
+       } response;
+} __aligned(16);
+
+union ism_read_gid {
+       struct {
+               struct ism_req_hdr hdr;
+       } request;
+       struct {
+               struct ism_resp_hdr hdr;
+               u64 gid;
+       } response;
+} __aligned(16);
+
+union ism_qi {
+       struct {
+               struct ism_req_hdr hdr;
+       } request;
+       struct {
+               struct ism_resp_hdr hdr;
+               u32 version;
+               u32 max_len;
+               u64 ism_state;
+               u64 my_gid;
+               u64 sba;
+               u64 ieq;
+               u32 ieq_len;
+               u32 : 32;
+               u32 dmbs_owned;
+               u32 dmbs_used;
+               u32 vlan_required;
+               u32 vlan_nr_ids;
+               u16 vlan_id[64];
+       } response;
+} __aligned(64);
+
+union ism_query_rgid {
+       struct {
+               struct ism_req_hdr hdr;
+               u64 rgid;
+               u32 vlan_valid;
+               u32 vlan_id;
+       } request;
+       struct {
+               struct ism_resp_hdr hdr;
+       } response;
+} __aligned(16);
+
+union ism_reg_dmb {
+       struct {
+               struct ism_req_hdr hdr;
+               u64 dmb;
+               u32 dmb_len;
+               u32 sba_idx;
+               u32 vlan_valid;
+               u32 vlan_id;
+               u64 rgid;
+       } request;
+       struct {
+               struct ism_resp_hdr hdr;
+               u64 dmb_tok;
+       } response;
+} __aligned(32);
+
+union ism_sig_ieq {
+       struct {
+               struct ism_req_hdr hdr;
+               u64 rgid;
+               u32 trigger_irq;
+               u32 event_code;
+               u64 info;
+       } request;
+       struct {
+               struct ism_resp_hdr hdr;
+       } response;
+} __aligned(32);
+
+union ism_unreg_dmb {
+       struct {
+               struct ism_req_hdr hdr;
+               u64 dmb_tok;
+       } request;
+       struct {
+               struct ism_resp_hdr hdr;
+       } response;
+} __aligned(16);
+
+union ism_cmd_simple {
+       struct {
+               struct ism_req_hdr hdr;
+       } request;
+       struct {
+               struct ism_resp_hdr hdr;
+       } response;
+} __aligned(8);
+
+union ism_set_vlan_id {
+       struct {
+               struct ism_req_hdr hdr;
+               u64 vlan_id;
+       } request;
+       struct {
+               struct ism_resp_hdr hdr;
+       } response;
+} __aligned(16);
+
+struct ism_eq_header {
+       u64 idx;
+       u64 ieq_len;
+       u64 entry_len;
+       u64 : 64;
+};
+
+struct ism_eq {
+       struct ism_eq_header header;
+       struct smcd_event entry[15];
+};
+
+struct ism_sba {
+       u32 s : 1;      /* summary bit */
+       u32 e : 1;      /* event bit */
+       u32 : 30;
+       u32 dmb_bits[ISM_NR_DMBS / 32];
+       u32 reserved[3];
+       u16 dmbe_mask[ISM_NR_DMBS];
+};
+
+struct ism_dev {
+       spinlock_t lock;
+       struct pci_dev *pdev;
+       struct smcd_dev *smcd;
+
+       void __iomem *ctl;
+
+       struct ism_sba *sba;
+       dma_addr_t sba_dma_addr;
+       DECLARE_BITMAP(sba_bitmap, ISM_NR_DMBS);
+
+       struct ism_eq *ieq;
+       dma_addr_t ieq_dma_addr;
+
+       int ieq_idx;
+};
+
+#define ISM_CREATE_REQ(dmb, idx, sf, offset)           \
+       ((dmb) | (idx) << 24 | (sf) << 23 | (offset))
+
+static inline int __ism_move(struct ism_dev *ism, u64 dmb_req, void *data,
+                            unsigned int size)
+{
+       struct zpci_dev *zdev = to_zpci(ism->pdev);
+       u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, size);
+
+       return zpci_write_block(req, data, dmb_req);
+}
+
+#endif /* S390_ISM_H */
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
new file mode 100644 (file)
index 0000000..c063189
--- /dev/null
@@ -0,0 +1,623 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ISM driver for s390.
+ *
+ * Copyright IBM Corp. 2018
+ */
+#define KMSG_COMPONENT "ism"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/err.h>
+#include <net/smc.h>
+
+#include <asm/debug.h>
+
+#include "ism.h"
+
+MODULE_DESCRIPTION("ISM driver for s390");
+MODULE_LICENSE("GPL");
+
+#define PCI_DEVICE_ID_IBM_ISM 0x04ED
+#define DRV_NAME "ism"
+
+static const struct pci_device_id ism_device_table[] = {
+       { PCI_VDEVICE(IBM, PCI_DEVICE_ID_IBM_ISM), 0 },
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ism_device_table);
+
+static debug_info_t *ism_debug_info;
+
+static int ism_cmd(struct ism_dev *ism, void *cmd)
+{
+       struct ism_req_hdr *req = cmd;
+       struct ism_resp_hdr *resp = cmd;
+
+       memcpy_toio(ism->ctl + sizeof(*req), req + 1, req->len - sizeof(*req));
+       memcpy_toio(ism->ctl, req, sizeof(*req));
+
+       WRITE_ONCE(resp->ret, ISM_ERROR);
+
+       memcpy_fromio(resp, ism->ctl, sizeof(*resp));
+       if (resp->ret) {
+               debug_text_event(ism_debug_info, 0, "cmd failure");
+               debug_event(ism_debug_info, 0, resp, sizeof(*resp));
+               goto out;
+       }
+       memcpy_fromio(resp + 1, ism->ctl + sizeof(*resp),
+                     resp->len - sizeof(*resp));
+out:
+       return resp->ret;
+}
+
+static int ism_cmd_simple(struct ism_dev *ism, u32 cmd_code)
+{
+       union ism_cmd_simple cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = cmd_code;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       return ism_cmd(ism, &cmd);
+}
+
+static int query_info(struct ism_dev *ism)
+{
+       union ism_qi cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_QUERY_INFO;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       if (ism_cmd(ism, &cmd))
+               goto out;
+
+       debug_text_event(ism_debug_info, 3, "query info");
+       debug_event(ism_debug_info, 3, &cmd.response, sizeof(cmd.response));
+out:
+       return 0;
+}
+
+static int register_sba(struct ism_dev *ism)
+{
+       union ism_reg_sba cmd;
+       dma_addr_t dma_handle;
+       struct ism_sba *sba;
+
+       sba = dma_zalloc_coherent(&ism->pdev->dev, PAGE_SIZE,
+                                 &dma_handle, GFP_KERNEL);
+       if (!sba)
+               return -ENOMEM;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_REG_SBA;
+       cmd.request.hdr.len = sizeof(cmd.request);
+       cmd.request.sba = dma_handle;
+
+       if (ism_cmd(ism, &cmd)) {
+               dma_free_coherent(&ism->pdev->dev, PAGE_SIZE, sba, dma_handle);
+               return -EIO;
+       }
+
+       ism->sba = sba;
+       ism->sba_dma_addr = dma_handle;
+
+       return 0;
+}
+
+static int register_ieq(struct ism_dev *ism)
+{
+       union ism_reg_ieq cmd;
+       dma_addr_t dma_handle;
+       struct ism_eq *ieq;
+
+       ieq = dma_zalloc_coherent(&ism->pdev->dev, PAGE_SIZE,
+                                 &dma_handle, GFP_KERNEL);
+       if (!ieq)
+               return -ENOMEM;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_REG_IEQ;
+       cmd.request.hdr.len = sizeof(cmd.request);
+       cmd.request.ieq = dma_handle;
+       cmd.request.len = sizeof(*ieq);
+
+       if (ism_cmd(ism, &cmd)) {
+               dma_free_coherent(&ism->pdev->dev, PAGE_SIZE, ieq, dma_handle);
+               return -EIO;
+       }
+
+       ism->ieq = ieq;
+       ism->ieq_idx = -1;
+       ism->ieq_dma_addr = dma_handle;
+
+       return 0;
+}
+
+static int unregister_sba(struct ism_dev *ism)
+{
+       if (!ism->sba)
+               return 0;
+
+       if (ism_cmd_simple(ism, ISM_UNREG_SBA))
+               return -EIO;
+
+       dma_free_coherent(&ism->pdev->dev, PAGE_SIZE,
+                         ism->sba, ism->sba_dma_addr);
+
+       ism->sba = NULL;
+       ism->sba_dma_addr = 0;
+
+       return 0;
+}
+
+static int unregister_ieq(struct ism_dev *ism)
+{
+       if (!ism->ieq)
+               return 0;
+
+       if (ism_cmd_simple(ism, ISM_UNREG_IEQ))
+               return -EIO;
+
+       dma_free_coherent(&ism->pdev->dev, PAGE_SIZE,
+                         ism->ieq, ism->ieq_dma_addr);
+
+       ism->ieq = NULL;
+       ism->ieq_dma_addr = 0;
+
+       return 0;
+}
+
+static int ism_read_local_gid(struct ism_dev *ism)
+{
+       union ism_read_gid cmd;
+       int ret;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_READ_GID;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       ret = ism_cmd(ism, &cmd);
+       if (ret)
+               goto out;
+
+       ism->smcd->local_gid = cmd.response.gid;
+out:
+       return ret;
+}
+
+static int ism_query_rgid(struct smcd_dev *smcd, u64 rgid, u32 vid_valid,
+                         u32 vid)
+{
+       struct ism_dev *ism = smcd->priv;
+       union ism_query_rgid cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_QUERY_RGID;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       cmd.request.rgid = rgid;
+       cmd.request.vlan_valid = vid_valid;
+       cmd.request.vlan_id = vid;
+
+       return ism_cmd(ism, &cmd);
+}
+
+static void ism_free_dmb(struct ism_dev *ism, struct smcd_dmb *dmb)
+{
+       clear_bit(dmb->sba_idx, ism->sba_bitmap);
+       dma_free_coherent(&ism->pdev->dev, dmb->dmb_len,
+                         dmb->cpu_addr, dmb->dma_addr);
+}
+
+static int ism_alloc_dmb(struct ism_dev *ism, struct smcd_dmb *dmb)
+{
+       unsigned long bit;
+
+       if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev))
+               return -EINVAL;
+
+       if (!dmb->sba_idx) {
+               bit = find_next_zero_bit(ism->sba_bitmap, ISM_NR_DMBS,
+                                        ISM_DMB_BIT_OFFSET);
+               if (bit == ISM_NR_DMBS)
+                       return -ENOMEM;
+
+               dmb->sba_idx = bit;
+       }
+       if (dmb->sba_idx < ISM_DMB_BIT_OFFSET ||
+           test_and_set_bit(dmb->sba_idx, ism->sba_bitmap))
+               return -EINVAL;
+
+       dmb->cpu_addr = dma_zalloc_coherent(&ism->pdev->dev, dmb->dmb_len,
+                                           &dmb->dma_addr, GFP_KERNEL |
+                                           __GFP_NOWARN | __GFP_NOMEMALLOC |
+                                           __GFP_COMP | __GFP_NORETRY);
+       if (!dmb->cpu_addr)
+               clear_bit(dmb->sba_idx, ism->sba_bitmap);
+
+       return dmb->cpu_addr ? 0 : -ENOMEM;
+}
+
+static int ism_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+       struct ism_dev *ism = smcd->priv;
+       union ism_reg_dmb cmd;
+       int ret;
+
+       ret = ism_alloc_dmb(ism, dmb);
+       if (ret)
+               goto out;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_REG_DMB;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       cmd.request.dmb = dmb->dma_addr;
+       cmd.request.dmb_len = dmb->dmb_len;
+       cmd.request.sba_idx = dmb->sba_idx;
+       cmd.request.vlan_valid = dmb->vlan_valid;
+       cmd.request.vlan_id = dmb->vlan_id;
+       cmd.request.rgid = dmb->rgid;
+
+       ret = ism_cmd(ism, &cmd);
+       if (ret) {
+               ism_free_dmb(ism, dmb);
+               goto out;
+       }
+       dmb->dmb_tok = cmd.response.dmb_tok;
+out:
+       return ret;
+}
+
+static int ism_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+       struct ism_dev *ism = smcd->priv;
+       union ism_unreg_dmb cmd;
+       int ret;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_UNREG_DMB;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       cmd.request.dmb_tok = dmb->dmb_tok;
+
+       ret = ism_cmd(ism, &cmd);
+       if (ret)
+               goto out;
+
+       ism_free_dmb(ism, dmb);
+out:
+       return ret;
+}
+
+static int ism_add_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
+{
+       struct ism_dev *ism = smcd->priv;
+       union ism_set_vlan_id cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_ADD_VLAN_ID;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       cmd.request.vlan_id = vlan_id;
+
+       return ism_cmd(ism, &cmd);
+}
+
+static int ism_del_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
+{
+       struct ism_dev *ism = smcd->priv;
+       union ism_set_vlan_id cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_DEL_VLAN_ID;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       cmd.request.vlan_id = vlan_id;
+
+       return ism_cmd(ism, &cmd);
+}
+
+static int ism_set_vlan_required(struct smcd_dev *smcd)
+{
+       return ism_cmd_simple(smcd->priv, ISM_SET_VLAN);
+}
+
+static int ism_reset_vlan_required(struct smcd_dev *smcd)
+{
+       return ism_cmd_simple(smcd->priv, ISM_RESET_VLAN);
+}
+
+static int ism_signal_ieq(struct smcd_dev *smcd, u64 rgid, u32 trigger_irq,
+                         u32 event_code, u64 info)
+{
+       struct ism_dev *ism = smcd->priv;
+       union ism_sig_ieq cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_SIGNAL_IEQ;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       cmd.request.rgid = rgid;
+       cmd.request.trigger_irq = trigger_irq;
+       cmd.request.event_code = event_code;
+       cmd.request.info = info;
+
+       return ism_cmd(ism, &cmd);
+}
+
+static unsigned int max_bytes(unsigned int start, unsigned int len,
+                             unsigned int boundary)
+{
+       return min(boundary - (start & (boundary - 1)), len);
+}
+
+static int ism_move(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx,
+                   bool sf, unsigned int offset, void *data, unsigned int size)
+{
+       struct ism_dev *ism = smcd->priv;
+       unsigned int bytes;
+       u64 dmb_req;
+       int ret;
+
+       while (size) {
+               bytes = max_bytes(offset, size, PAGE_SIZE);
+               dmb_req = ISM_CREATE_REQ(dmb_tok, idx, size == bytes ? sf : 0,
+                                        offset);
+
+               ret = __ism_move(ism, dmb_req, data, bytes);
+               if (ret)
+                       return ret;
+
+               size -= bytes;
+               data += bytes;
+               offset += bytes;
+       }
+
+       return 0;
+}
+
+static void ism_handle_event(struct ism_dev *ism)
+{
+       struct smcd_event *entry;
+
+       while ((ism->ieq_idx + 1) != READ_ONCE(ism->ieq->header.idx)) {
+               if (++(ism->ieq_idx) == ARRAY_SIZE(ism->ieq->entry))
+                       ism->ieq_idx = 0;
+
+               entry = &ism->ieq->entry[ism->ieq_idx];
+               debug_event(ism_debug_info, 2, entry, sizeof(*entry));
+               smcd_handle_event(ism->smcd, entry);
+       }
+}
+
+static irqreturn_t ism_handle_irq(int irq, void *data)
+{
+       struct ism_dev *ism = data;
+       unsigned long bit, end;
+       unsigned long *bv;
+
+       bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET];
+       end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET;
+
+       spin_lock(&ism->lock);
+       ism->sba->s = 0;
+       barrier();
+       for (bit = 0;;) {
+               bit = find_next_bit_inv(bv, end, bit);
+               if (bit >= end)
+                       break;
+
+               clear_bit_inv(bit, bv);
+               barrier();
+               smcd_handle_irq(ism->smcd, bit + ISM_DMB_BIT_OFFSET);
+               ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0;
+       }
+
+       if (ism->sba->e) {
+               ism->sba->e = 0;
+               barrier();
+               ism_handle_event(ism);
+       }
+       spin_unlock(&ism->lock);
+       return IRQ_HANDLED;
+}
+
+static const struct smcd_ops ism_ops = {
+       .query_remote_gid = ism_query_rgid,
+       .register_dmb = ism_register_dmb,
+       .unregister_dmb = ism_unregister_dmb,
+       .add_vlan_id = ism_add_vlan_id,
+       .del_vlan_id = ism_del_vlan_id,
+       .set_vlan_required = ism_set_vlan_required,
+       .reset_vlan_required = ism_reset_vlan_required,
+       .signal_event = ism_signal_ieq,
+       .move_data = ism_move,
+};
+
+static int ism_dev_init(struct ism_dev *ism)
+{
+       struct pci_dev *pdev = ism->pdev;
+       int ret;
+
+       ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+       if (ret <= 0)
+               goto out;
+
+       ret = request_irq(pci_irq_vector(pdev, 0), ism_handle_irq, 0,
+                         pci_name(pdev), ism);
+       if (ret)
+               goto free_vectors;
+
+       ret = register_sba(ism);
+       if (ret)
+               goto free_irq;
+
+       ret = register_ieq(ism);
+       if (ret)
+               goto unreg_sba;
+
+       ret = ism_read_local_gid(ism);
+       if (ret)
+               goto unreg_ieq;
+
+       ret = smcd_register_dev(ism->smcd);
+       if (ret)
+               goto unreg_ieq;
+
+       query_info(ism);
+       return 0;
+
+unreg_ieq:
+       unregister_ieq(ism);
+unreg_sba:
+       unregister_sba(ism);
+free_irq:
+       free_irq(pci_irq_vector(pdev, 0), ism);
+free_vectors:
+       pci_free_irq_vectors(pdev);
+out:
+       return ret;
+}
+
+static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct ism_dev *ism;
+       int ret;
+
+       ism = kzalloc(sizeof(*ism), GFP_KERNEL);
+       if (!ism)
+               return -ENOMEM;
+
+       spin_lock_init(&ism->lock);
+       dev_set_drvdata(&pdev->dev, ism);
+       ism->pdev = pdev;
+
+       ret = pci_enable_device_mem(pdev);
+       if (ret)
+               goto err;
+
+       ret = pci_request_mem_regions(pdev, DRV_NAME);
+       if (ret)
+               goto err_disable;
+
+       ism->ctl = pci_iomap(pdev, 2, 0);
+       if (!ism->ctl)
+               goto err_resource;
+
+       ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (ret)
+               goto err_unmap;
+
+       pci_set_dma_seg_boundary(pdev, SZ_1M - 1);
+       pci_set_dma_max_seg_size(pdev, SZ_1M);
+       pci_set_master(pdev);
+
+       ism->smcd = smcd_alloc_dev(&pdev->dev, dev_name(&pdev->dev), &ism_ops,
+                                  ISM_NR_DMBS);
+       if (!ism->smcd)
+               goto err_unmap;
+
+       ism->smcd->priv = ism;
+       ret = ism_dev_init(ism);
+       if (ret)
+               goto err_free;
+
+       return 0;
+
+err_free:
+       smcd_free_dev(ism->smcd);
+err_unmap:
+       pci_iounmap(pdev, ism->ctl);
+err_resource:
+       pci_release_mem_regions(pdev);
+err_disable:
+       pci_disable_device(pdev);
+err:
+       kfree(ism);
+       dev_set_drvdata(&pdev->dev, NULL);
+       return ret;
+}
+
+static void ism_dev_exit(struct ism_dev *ism)
+{
+       struct pci_dev *pdev = ism->pdev;
+
+       smcd_unregister_dev(ism->smcd);
+       unregister_ieq(ism);
+       unregister_sba(ism);
+       free_irq(pci_irq_vector(pdev, 0), ism);
+       pci_free_irq_vectors(pdev);
+}
+
+static void ism_remove(struct pci_dev *pdev)
+{
+       struct ism_dev *ism = dev_get_drvdata(&pdev->dev);
+
+       ism_dev_exit(ism);
+
+       smcd_free_dev(ism->smcd);
+       pci_iounmap(pdev, ism->ctl);
+       pci_release_mem_regions(pdev);
+       pci_disable_device(pdev);
+       dev_set_drvdata(&pdev->dev, NULL);
+       kfree(ism);
+}
+
+static int ism_suspend(struct device *dev)
+{
+       struct ism_dev *ism = dev_get_drvdata(dev);
+
+       ism_dev_exit(ism);
+       return 0;
+}
+
+static int ism_resume(struct device *dev)
+{
+       struct ism_dev *ism = dev_get_drvdata(dev);
+
+       return ism_dev_init(ism);
+}
+
+static SIMPLE_DEV_PM_OPS(ism_pm_ops, ism_suspend, ism_resume);
+
+static struct pci_driver ism_driver = {
+       .name     = DRV_NAME,
+       .id_table = ism_device_table,
+       .probe    = ism_probe,
+       .remove   = ism_remove,
+       .driver   = {
+               .pm = &ism_pm_ops,
+       },
+};
+
+static int __init ism_init(void)
+{
+       int ret;
+
+       ism_debug_info = debug_register("ism", 2, 1, 16);
+       if (!ism_debug_info)
+               return -ENODEV;
+
+       debug_register_view(ism_debug_info, &debug_hex_ascii_view);
+       ret = pci_register_driver(&ism_driver);
+       if (ret)
+               debug_unregister(ism_debug_info);
+
+       return ret;
+}
+
+static void __exit ism_exit(void)
+{
+       pci_unregister_driver(&ism_driver);
+       debug_unregister(ism_debug_info);
+}
+
+module_init(ism_init);
+module_exit(ism_exit);
index a246a618f9a497047e4a81614f38da1eb295ef0b..605ec47067734cf0233a0fdf9908b6d49b163939 100644 (file)
@@ -104,6 +104,7 @@ struct qeth_dbf_info {
 struct qeth_perf_stats {
        unsigned int bufs_rec;
        unsigned int bufs_sent;
+       unsigned int buf_elements_sent;
 
        unsigned int skbs_sent_pack;
        unsigned int bufs_sent_pack;
@@ -137,7 +138,6 @@ struct qeth_perf_stats {
        unsigned int large_send_bytes;
        unsigned int large_send_cnt;
        unsigned int sg_skbs_sent;
-       unsigned int sg_frags_sent;
        /* initial values when measuring starts */
        unsigned long initial_rx_packets;
        unsigned long initial_tx_packets;
@@ -465,7 +465,6 @@ struct qeth_qdio_out_buffer {
        struct sk_buff_head skb_list;
        int is_header[QDIO_MAX_ELEMENTS_PER_BUFFER];
 
-       struct qaob *aob;
        struct qeth_qdio_out_q *q;
        struct qeth_qdio_out_buffer *next_pending;
 };
@@ -659,12 +658,8 @@ struct qeth_card_info {
        char mcl_level[QETH_MCL_LENGTH + 1];
        int guestlan;
        int mac_bits;
-       int portno;
        enum qeth_card_types type;
        enum qeth_link_types link_type;
-       int is_multicast_different;
-       int initial_mtu;
-       int max_mtu;
        int broadcast_capable;
        int unique_id;
        bool layer_enforced;
@@ -935,6 +930,19 @@ static inline int qeth_send_simple_setassparms_v6(struct qeth_card *card,
                                                 data, QETH_PROT_IPV6);
 }
 
+int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
+                           int ipv);
+static inline struct qeth_qdio_out_q *qeth_get_tx_queue(struct qeth_card *card,
+                                                       struct sk_buff *skb,
+                                                       int ipv, int cast_type)
+{
+       if (IS_IQD(card) && cast_type != RTN_UNICAST)
+               return card->qdio.out_qs[card->qdio.no_out_queues - 1];
+       if (!card->qdio.do_prio_queueing)
+               return card->qdio.out_qs[card->qdio.default_out_queue];
+       return card->qdio.out_qs[qeth_get_priority_queue(card, skb, ipv)];
+}
+
 extern struct qeth_discipline qeth_l2_discipline;
 extern struct qeth_discipline qeth_l3_discipline;
 extern const struct attribute_group *qeth_generic_attr_groups[];
@@ -955,6 +963,7 @@ extern struct qeth_card_list_struct qeth_core_card_list;
 extern struct kmem_cache *qeth_core_header_cache;
 extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS];
 
+struct net_device *qeth_clone_netdev(struct net_device *orig);
 void qeth_set_recovery_task(struct qeth_card *);
 void qeth_clear_recovery_task(struct qeth_card *);
 void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int);
@@ -972,7 +981,6 @@ int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *,
                  void *);
 struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *,
                        enum qeth_ipa_cmds, enum qeth_prot_versions);
-int qeth_query_setadapterparms(struct qeth_card *);
 struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
                struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
                struct qeth_hdr **);
@@ -985,7 +993,6 @@ void qeth_clear_cmd_buffers(struct qeth_channel *);
 void qeth_clear_qdio_buffers(struct qeth_card *);
 void qeth_setadp_promisc_mode(struct qeth_card *);
 struct net_device_stats *qeth_get_stats(struct net_device *);
-int qeth_change_mtu(struct net_device *, int);
 int qeth_setadpparms_change_macaddr(struct qeth_card *);
 void qeth_tx_timeout(struct net_device *);
 void qeth_prepare_control_data(struct qeth_card *, int,
@@ -998,11 +1005,6 @@ int qeth_query_switch_attributes(struct qeth_card *card,
 int qeth_send_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *,
        int (*reply_cb)(struct qeth_card *, struct qeth_reply*, unsigned long),
        void *reply_param);
-int qeth_bridgeport_query_ports(struct qeth_card *card,
-       enum qeth_sbp_roles *role, enum qeth_sbp_states *state);
-int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role);
-int qeth_bridgeport_an_set(struct qeth_card *card, int enable);
-int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
 int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb,
                         int extra_elems, int data_offset);
 int qeth_get_elements_for_frags(struct sk_buff *);
@@ -1026,7 +1028,6 @@ int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback);
 int qeth_hdr_chk_and_bounce(struct sk_buff *, struct qeth_hdr **, int);
 int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
-int qeth_query_ipassists(struct qeth_card *, enum qeth_prot_versions prot);
 void qeth_trace_features(struct qeth_card *);
 void qeth_close_dev(struct qeth_card *);
 int qeth_send_setassparms(struct qeth_card *, struct qeth_cmd_buffer *, __u16,
@@ -1046,7 +1047,9 @@ netdev_features_t qeth_features_check(struct sk_buff *skb,
                                      struct net_device *dev,
                                      netdev_features_t features);
 int qeth_vm_request_mac(struct qeth_card *card);
-int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len);
+int qeth_add_hw_header(struct qeth_card *card, struct sk_buff *skb,
+                      struct qeth_hdr **hdr, unsigned int hdr_len,
+                      unsigned int proto_len, unsigned int *elements);
 
 /* exports for OSN */
 int qeth_osn_assist(struct net_device *, void *, int);
index d01ac29fd986d82b84b7215c5268c37e94aaadaa..d09a7110b3813f8fe335880f7e8223b898cbbf37 100644 (file)
@@ -473,7 +473,6 @@ static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx,
        if (forced_cleanup && (atomic_read(&(q->bufs[bidx]->state)) ==
                                        QETH_QDIO_BUF_HANDLED_DELAYED)) {
                /* for recovery situations */
-               q->bufs[bidx]->aob = q->bufstates[bidx].aob;
                qeth_init_qdio_out_buf(q, bidx);
                QETH_CARD_TEXT(q->card, 2, "clprecov");
        }
@@ -510,7 +509,6 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
        }
        qeth_notify_skbs(buffer->q, buffer, notification);
 
-       buffer->aob = NULL;
        /* Free dangling allocations. The attached skbs are handled by
         * qeth_cleanup_handled_pending().
         */
@@ -655,8 +653,7 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
                                                cmd->hdr.return_code, card);
                                }
                                card->lan_online = 0;
-                               if (card->dev && netif_carrier_ok(card->dev))
-                                       netif_carrier_off(card->dev);
+                               netif_carrier_off(card->dev);
                                return NULL;
                        case IPA_CMD_STARTLAN:
                                dev_info(&card->gdev->dev,
@@ -1267,8 +1264,7 @@ static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf)
 }
 
 static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
-               struct qeth_qdio_out_buffer *buf,
-               enum qeth_qdio_buffer_states newbufstate)
+                                    struct qeth_qdio_out_buffer *buf)
 {
        int i;
 
@@ -1276,23 +1272,19 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
        if (buf->buffer->element[0].sflags & SBAL_SFLAGS0_PCI_REQ)
                atomic_dec(&queue->set_pci_flags_count);
 
-       if (newbufstate == QETH_QDIO_BUF_EMPTY) {
-               qeth_release_skbs(buf);
-       }
+       qeth_release_skbs(buf);
+
        for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(queue->card); ++i) {
                if (buf->buffer->element[i].addr && buf->is_header[i])
                        kmem_cache_free(qeth_core_header_cache,
                                buf->buffer->element[i].addr);
                buf->is_header[i] = 0;
-               buf->buffer->element[i].length = 0;
-               buf->buffer->element[i].addr = NULL;
-               buf->buffer->element[i].eflags = 0;
-               buf->buffer->element[i].sflags = 0;
        }
-       buf->buffer->element[15].eflags = 0;
-       buf->buffer->element[15].sflags = 0;
+
+       qeth_scrub_qdio_buffer(buf->buffer,
+                              QETH_MAX_BUFFER_ELEMENTS(queue->card));
        buf->next_element_to_fill = 0;
-       atomic_set(&buf->state, newbufstate);
+       atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY);
 }
 
 static void qeth_clear_outq_buffers(struct qeth_qdio_out_q *q, int free)
@@ -1303,7 +1295,7 @@ static void qeth_clear_outq_buffers(struct qeth_qdio_out_q *q, int free)
                if (!q->bufs[j])
                        continue;
                qeth_cleanup_handled_pending(q, j, 1);
-               qeth_clear_output_buffer(q, q->bufs[j], QETH_QDIO_BUF_EMPTY);
+               qeth_clear_output_buffer(q, q->bufs[j]);
                if (free) {
                        kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]);
                        q->bufs[j] = NULL;
@@ -1544,8 +1536,6 @@ static void qeth_determine_card_type(struct qeth_card *card)
        card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
        card->info.type = CARD_RDEV(card)->id.driver_info;
        card->qdio.no_out_queues = QETH_MAX_QUEUES;
-       if (card->info.type == QETH_CARD_TYPE_IQD)
-               card->info.is_multicast_different = 0x0103;
        qeth_update_from_chp_desc(card);
 }
 
@@ -1930,7 +1920,7 @@ static int qeth_idx_activate_channel(struct qeth_channel *channel,
                memcpy(QETH_TRANSPORT_HEADER_SEQ_NO(iob->data),
                       &card->seqno.trans_hdr, QETH_SEQ_NO_LENGTH);
        }
-       tmp = ((__u8)card->info.portno) | 0x80;
+       tmp = ((u8)card->dev->dev_port) | 0x80;
        memcpy(QETH_IDX_ACT_PNO(iob->data), &tmp, 1);
        memcpy(QETH_IDX_ACT_ISSUER_RM_TOKEN(iob->data),
               &card->token.issuer_rm_w, QETH_MPC_TOKEN_LENGTH);
@@ -2288,19 +2278,42 @@ static int qeth_cm_setup(struct qeth_card *card)
 
 }
 
-static int qeth_get_initial_mtu_for_card(struct qeth_card *card)
+static int qeth_update_max_mtu(struct qeth_card *card, unsigned int max_mtu)
 {
-       switch (card->info.type) {
-       case QETH_CARD_TYPE_IQD:
-               return card->info.max_mtu;
-       case QETH_CARD_TYPE_OSD:
-       case QETH_CARD_TYPE_OSX:
-               if (!card->options.layer2)
-                       return ETH_DATA_LEN - 8; /* L3: allow for LLC + SNAP */
-               /* fall through */
-       default:
-               return ETH_DATA_LEN;
+       struct net_device *dev = card->dev;
+       unsigned int new_mtu;
+
+       if (!max_mtu) {
+               /* IQD needs accurate max MTU to set up its RX buffers: */
+               if (IS_IQD(card))
+                       return -EINVAL;
+               /* tolerate quirky HW: */
+               max_mtu = ETH_MAX_MTU;
+       }
+
+       rtnl_lock();
+       if (IS_IQD(card)) {
+               /* move any device with default MTU to new max MTU: */
+               new_mtu = (dev->mtu == dev->max_mtu) ? max_mtu : dev->mtu;
+
+               /* adjust RX buffer size to new max MTU: */
+               card->qdio.in_buf_size = max_mtu + 2 * PAGE_SIZE;
+               if (dev->max_mtu && dev->max_mtu != max_mtu)
+                       qeth_free_qdio_buffers(card);
+       } else {
+               if (dev->mtu)
+                       new_mtu = dev->mtu;
+               /* default MTUs for first setup: */
+               else if (card->options.layer2)
+                       new_mtu = ETH_DATA_LEN;
+               else
+                       new_mtu = ETH_DATA_LEN - 8; /* allow for LLC + SNAP */
        }
+
+       dev->max_mtu = max_mtu;
+       dev->mtu = min(new_mtu, max_mtu);
+       rtnl_unlock();
+       return 0;
 }
 
 static int qeth_get_mtu_outof_framesize(int framesize)
@@ -2319,21 +2332,6 @@ static int qeth_get_mtu_outof_framesize(int framesize)
        }
 }
 
-static int qeth_mtu_is_valid(struct qeth_card *card, int mtu)
-{
-       switch (card->info.type) {
-       case QETH_CARD_TYPE_OSD:
-       case QETH_CARD_TYPE_OSM:
-       case QETH_CARD_TYPE_OSX:
-       case QETH_CARD_TYPE_IQD:
-               return ((mtu >= 576) &&
-                       (mtu <= card->info.max_mtu));
-       case QETH_CARD_TYPE_OSN:
-       default:
-               return 1;
-       }
-}
-
 static int qeth_ulp_enable_cb(struct qeth_card *card, struct qeth_reply *reply,
                unsigned long data)
 {
@@ -2352,29 +2350,10 @@ static int qeth_ulp_enable_cb(struct qeth_card *card, struct qeth_reply *reply,
        if (card->info.type == QETH_CARD_TYPE_IQD) {
                memcpy(&framesize, QETH_ULP_ENABLE_RESP_MAX_MTU(iob->data), 2);
                mtu = qeth_get_mtu_outof_framesize(framesize);
-               if (!mtu) {
-                       iob->rc = -EINVAL;
-                       QETH_DBF_TEXT_(SETUP, 2, "  rc%d", iob->rc);
-                       return 0;
-               }
-               if (card->info.initial_mtu && (card->info.initial_mtu != mtu)) {
-                       /* frame size has changed */
-                       if (card->dev &&
-                           ((card->dev->mtu == card->info.initial_mtu) ||
-                            (card->dev->mtu > mtu)))
-                               card->dev->mtu = mtu;
-                       qeth_free_qdio_buffers(card);
-               }
-               card->info.initial_mtu = mtu;
-               card->info.max_mtu = mtu;
-               card->qdio.in_buf_size = mtu + 2 * PAGE_SIZE;
        } else {
-               card->info.max_mtu = *(__u16 *)QETH_ULP_ENABLE_RESP_MAX_MTU(
-                       iob->data);
-               card->info.initial_mtu = min(card->info.max_mtu,
-                                       qeth_get_initial_mtu_for_card(card));
-               card->qdio.in_buf_size = QETH_IN_BUF_SIZE_DEFAULT;
+               mtu = *(__u16 *)QETH_ULP_ENABLE_RESP_MAX_MTU(iob->data);
        }
+       *(u16 *)reply->param = mtu;
 
        memcpy(&len, QETH_ULP_ENABLE_RESP_DIFINFO_LEN(iob->data), 2);
        if (len >= QETH_MPC_DIFINFO_LEN_INDICATES_LINK_TYPE) {
@@ -2393,6 +2372,7 @@ static int qeth_ulp_enable(struct qeth_card *card)
        int rc;
        char prot_type;
        struct qeth_cmd_buffer *iob;
+       u16 max_mtu;
 
        /*FIXME: trace view callbacks*/
        QETH_DBF_TEXT(SETUP, 2, "ulpenabl");
@@ -2400,8 +2380,7 @@ static int qeth_ulp_enable(struct qeth_card *card)
        iob = qeth_wait_for_buffer(&card->write);
        memcpy(iob->data, ULP_ENABLE, ULP_ENABLE_SIZE);
 
-       *(QETH_ULP_ENABLE_LINKNUM(iob->data)) =
-               (__u8) card->info.portno;
+       *(QETH_ULP_ENABLE_LINKNUM(iob->data)) = (u8) card->dev->dev_port;
        if (card->options.layer2)
                if (card->info.type == QETH_CARD_TYPE_OSN)
                        prot_type = QETH_PROT_OSN2;
@@ -2416,9 +2395,10 @@ static int qeth_ulp_enable(struct qeth_card *card)
        memcpy(QETH_ULP_ENABLE_FILTER_TOKEN(iob->data),
               &card->token.ulp_filter_w, QETH_MPC_TOKEN_LENGTH);
        rc = qeth_send_control_data(card, ULP_ENABLE_SIZE, iob,
-                                   qeth_ulp_enable_cb, NULL);
-       return rc;
-
+                                   qeth_ulp_enable_cb, &max_mtu);
+       if (rc)
+               return rc;
+       return qeth_update_max_mtu(card, max_mtu);
 }
 
 static int qeth_ulp_setup_cb(struct qeth_card *card, struct qeth_reply *reply,
@@ -2473,32 +2453,20 @@ static int qeth_ulp_setup(struct qeth_card *card)
 
 static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
 {
-       int rc;
        struct qeth_qdio_out_buffer *newbuf;
 
-       rc = 0;
        newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, GFP_ATOMIC);
-       if (!newbuf) {
-               rc = -ENOMEM;
-               goto out;
-       }
+       if (!newbuf)
+               return -ENOMEM;
+
        newbuf->buffer = q->qdio_bufs[bidx];
        skb_queue_head_init(&newbuf->skb_list);
        lockdep_set_class(&newbuf->skb_list.lock, &qdio_out_skb_queue_key);
        newbuf->q = q;
-       newbuf->aob = NULL;
        newbuf->next_pending = q->bufs[bidx];
        atomic_set(&newbuf->state, QETH_QDIO_BUF_EMPTY);
        q->bufs[bidx] = newbuf;
-       if (q->bufstates) {
-               q->bufstates[bidx].user = newbuf;
-               QETH_CARD_TEXT_(q->card, 2, "nbs%d", bidx);
-               QETH_CARD_TEXT_(q->card, 2, "%lx", (long) newbuf);
-               QETH_CARD_TEXT_(q->card, 2, "%lx",
-                               (long) newbuf->next_pending);
-       }
-out:
-       return rc;
+       return 0;
 }
 
 static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q)
@@ -2908,8 +2876,7 @@ int qeth_init_qdio_queues(struct qeth_card *card)
                                   QDIO_MAX_BUFFERS_PER_Q);
                for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
                        qeth_clear_output_buffer(card->qdio.out_qs[i],
-                                       card->qdio.out_qs[i]->bufs[j],
-                                       QETH_QDIO_BUF_EMPTY);
+                                                card->qdio.out_qs[i]->bufs[j]);
                }
                card->qdio.out_qs[i]->card = card;
                card->qdio.out_qs[i]->next_buf_to_fill = 0;
@@ -2942,7 +2909,7 @@ static void qeth_fill_ipacmd_header(struct qeth_card *card,
        cmd->hdr.initiator = IPA_CMD_INITIATOR_HOST;
        /* cmd->hdr.seqno is set by qeth_send_control_data() */
        cmd->hdr.adapter_type = qeth_get_ipa_adp_type(card->info.link_type);
-       cmd->hdr.rel_adapter_no = (__u8) card->info.portno;
+       cmd->hdr.rel_adapter_no = (u8) card->dev->dev_port;
        if (card->options.layer2)
                cmd->hdr.prim_version_no = 2;
        else
@@ -3076,7 +3043,7 @@ static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card,
        return iob;
 }
 
-int qeth_query_setadapterparms(struct qeth_card *card)
+static int qeth_query_setadapterparms(struct qeth_card *card)
 {
        int rc;
        struct qeth_cmd_buffer *iob;
@@ -3089,7 +3056,6 @@ int qeth_query_setadapterparms(struct qeth_card *card)
        rc = qeth_send_ipa_cmd(card, iob, qeth_query_setadapterparms_cb, NULL);
        return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_query_setadapterparms);
 
 static int qeth_query_ipassists_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
@@ -3129,7 +3095,8 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
        return 0;
 }
 
-int qeth_query_ipassists(struct qeth_card *card, enum qeth_prot_versions prot)
+static int qeth_query_ipassists(struct qeth_card *card,
+                               enum qeth_prot_versions prot)
 {
        int rc;
        struct qeth_cmd_buffer *iob;
@@ -3141,7 +3108,6 @@ int qeth_query_ipassists(struct qeth_card *card, enum qeth_prot_versions prot)
        rc = qeth_send_ipa_cmd(card, iob, qeth_query_ipassists_cb, NULL);
        return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_query_ipassists);
 
 static int qeth_query_switch_attributes_cb(struct qeth_card *card,
                                struct qeth_reply *reply, unsigned long data)
@@ -3180,7 +3146,6 @@ int qeth_query_switch_attributes(struct qeth_card *card,
        return qeth_send_ipa_cmd(card, iob,
                                qeth_query_switch_attributes_cb, sw_info);
 }
-EXPORT_SYMBOL_GPL(qeth_query_switch_attributes);
 
 static int qeth_query_setdiagass_cb(struct qeth_card *card,
                struct qeth_reply *reply, unsigned long data)
@@ -3530,13 +3495,14 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
        qdio_flags = QDIO_FLAG_SYNC_OUTPUT;
        if (atomic_read(&queue->set_pci_flags_count))
                qdio_flags |= QDIO_FLAG_PCI_OUT;
+       atomic_add(count, &queue->used_buffers);
+
        rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags,
                     queue->queue_no, index, count);
        if (queue->card->options.performance_stats)
                queue->card->perf_stats.outbound_do_qdio_time +=
                        qeth_get_micros() -
                        queue->card->perf_stats.outbound_do_qdio_start_time;
-       atomic_add(count, &queue->used_buffers);
        if (rc) {
                queue->card->stats.tx_errors += count;
                /* ignore temporary SIGA errors without busy condition */
@@ -3601,7 +3567,7 @@ static void qeth_qdio_start_poll(struct ccw_device *ccwdev, int queue,
 {
        struct qeth_card *card = (struct qeth_card *)card_ptr;
 
-       if (card->dev && (card->dev->flags & IFF_UP))
+       if (card->dev->flags & IFF_UP)
                napi_schedule(&card->napi);
 }
 
@@ -3634,10 +3600,10 @@ out:
 }
 EXPORT_SYMBOL_GPL(qeth_configure_cq);
 
-
-static void qeth_qdio_cq_handler(struct qeth_card *card,
-               unsigned int qdio_err,
-               unsigned int queue, int first_element, int count) {
+static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
+                                unsigned int queue, int first_element,
+                                int count)
+{
        struct qeth_qdio_q *cq = card->qdio.c_q;
        int i;
        int rc;
@@ -3663,25 +3629,17 @@ static void qeth_qdio_cq_handler(struct qeth_card *card,
        for (i = first_element; i < first_element + count; ++i) {
                int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
                struct qdio_buffer *buffer = cq->qdio_bufs[bidx];
-               int e;
+               int e = 0;
 
-               e = 0;
                while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) &&
                       buffer->element[e].addr) {
                        unsigned long phys_aob_addr;
 
                        phys_aob_addr = (unsigned long) buffer->element[e].addr;
                        qeth_qdio_handle_aob(card, phys_aob_addr);
-                       buffer->element[e].addr = NULL;
-                       buffer->element[e].eflags = 0;
-                       buffer->element[e].sflags = 0;
-                       buffer->element[e].length = 0;
-
                        ++e;
                }
-
-               buffer->element[15].eflags = 0;
-               buffer->element[15].sflags = 0;
+               qeth_scrub_qdio_buffer(buffer, QDIO_MAX_ELEMENTS_PER_BUFFER);
        }
        rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, queue,
                    card->qdio.c_q->next_buf_to_init,
@@ -3760,11 +3718,7 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
                                qeth_notify_skbs(queue, buffer,
                                                 TX_NOTIFY_PENDING);
                        }
-                       buffer->aob = queue->bufstates[bidx].aob;
                        QETH_CARD_TEXT_(queue->card, 5, "pel%d", bidx);
-                       QETH_CARD_TEXT(queue->card, 5, "aob");
-                       QETH_CARD_TEXT_(queue->card, 5, "%lx",
-                                       virt_to_phys(buffer->aob));
 
                        /* prepare the queue slot for re-use: */
                        qeth_scrub_qdio_buffer(buffer->buffer,
@@ -3782,8 +3736,7 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
                                qeth_notify_skbs(queue, buffer, n);
                        }
 
-                       qeth_clear_output_buffer(queue, buffer,
-                                               QETH_QDIO_BUF_EMPTY);
+                       qeth_clear_output_buffer(queue, buffer);
                }
                qeth_cleanup_handled_pending(queue, bidx, 0);
        }
@@ -3810,15 +3763,11 @@ static inline int qeth_cut_iqd_prio(struct qeth_card *card, int queue_num)
  * Note: Function assumes that we have 4 outbound queues.
  */
 int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
-                       int ipv, int cast_type)
+                           int ipv)
 {
        __be16 *tci;
        u8 tos;
 
-       if (cast_type && card->info.is_multicast_different)
-               return card->info.is_multicast_different &
-                       (card->qdio.no_out_queues - 1);
-
        switch (card->qdio.do_prio_queueing) {
        case QETH_PRIO_Q_ING_TOS:
        case QETH_PRIO_Q_ING_PREC:
@@ -3882,6 +3831,17 @@ int qeth_get_elements_for_frags(struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
 
+static unsigned int qeth_count_elements(struct sk_buff *skb, int data_offset)
+{
+       unsigned int elements = qeth_get_elements_for_frags(skb);
+       addr_t end = (addr_t)skb->data + skb_headlen(skb);
+       addr_t start = (addr_t)skb->data + data_offset;
+
+       if (start != end)
+               elements += qeth_get_elements_for_range(start, end);
+       return elements;
+}
+
 /**
  * qeth_get_elements_no() -    find number of SBALEs for skb data, inc. frags.
  * @card:                      qeth card structure, to check max. elems.
@@ -3897,12 +3857,7 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
 int qeth_get_elements_no(struct qeth_card *card,
                     struct sk_buff *skb, int extra_elems, int data_offset)
 {
-       addr_t end = (addr_t)skb->data + skb_headlen(skb);
-       int elements = qeth_get_elements_for_frags(skb);
-       addr_t start = (addr_t)skb->data + data_offset;
-
-       if (start != end)
-               elements += qeth_get_elements_for_range(start, end);
+       int elements = qeth_count_elements(skb, data_offset);
 
        if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
                QETH_DBF_MESSAGE(2, "Invalid size of IP packet "
@@ -3936,32 +3891,87 @@ int qeth_hdr_chk_and_bounce(struct sk_buff *skb, struct qeth_hdr **hdr, int len)
 EXPORT_SYMBOL_GPL(qeth_hdr_chk_and_bounce);
 
 /**
- * qeth_push_hdr() - push a qeth_hdr onto an skb.
- * @skb: skb that the qeth_hdr should be pushed onto.
+ * qeth_add_hw_header() - add a HW header to an skb.
+ * @skb: skb that the HW header should be added to.
  * @hdr: double pointer to a qeth_hdr. When returning with >= 0,
  *      it contains a valid pointer to a qeth_hdr.
- * @len: length of the hdr that needs to be pushed on.
+ * @hdr_len: length of the HW header.
+ * @proto_len: length of protocol headers that need to be in same page as the
+ *            HW header.
  *
  * Returns the pushed length. If the header can't be pushed on
  * (eg. because it would cross a page boundary), it is allocated from
  * the cache instead and 0 is returned.
+ * The number of needed buffer elements is returned in @elements.
  * Error to create the hdr is indicated by returning with < 0.
  */
-int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len)
-{
-       if (skb_headroom(skb) >= len &&
-           qeth_get_elements_for_range((addr_t)skb->data - len,
-                                       (addr_t)skb->data) == 1) {
-               *hdr = skb_push(skb, len);
-               return len;
+int qeth_add_hw_header(struct qeth_card *card, struct sk_buff *skb,
+                      struct qeth_hdr **hdr, unsigned int hdr_len,
+                      unsigned int proto_len, unsigned int *elements)
+{
+       const unsigned int max_elements = QETH_MAX_BUFFER_ELEMENTS(card);
+       const unsigned int contiguous = proto_len ? proto_len : 1;
+       unsigned int __elements;
+       addr_t start, end;
+       bool push_ok;
+       int rc;
+
+check_layout:
+       start = (addr_t)skb->data - hdr_len;
+       end = (addr_t)skb->data;
+
+       if (qeth_get_elements_for_range(start, end + contiguous) == 1) {
+               /* Push HW header into same page as first protocol header. */
+               push_ok = true;
+               __elements = qeth_count_elements(skb, 0);
+       } else if (!proto_len && qeth_get_elements_for_range(start, end) == 1) {
+               /* Push HW header into a new page. */
+               push_ok = true;
+               __elements = 1 + qeth_count_elements(skb, 0);
+       } else {
+               /* Use header cache, copy protocol headers up. */
+               push_ok = false;
+               __elements = 1 + qeth_count_elements(skb, proto_len);
+       }
+
+       /* Compress skb to fit into one IO buffer: */
+       if (__elements > max_elements) {
+               if (!skb_is_nonlinear(skb)) {
+                       /* Drop it, no easy way of shrinking it further. */
+                       QETH_DBF_MESSAGE(2, "Dropped an oversized skb (Max Elements=%u / Actual=%u / Length=%u).\n",
+                                        max_elements, __elements, skb->len);
+                       return -E2BIG;
+               }
+
+               rc = skb_linearize(skb);
+               if (card->options.performance_stats) {
+                       if (rc)
+                               card->perf_stats.tx_linfail++;
+                       else
+                               card->perf_stats.tx_lin++;
+               }
+               if (rc)
+                       return rc;
+
+               /* Linearization changed the layout, re-evaluate: */
+               goto check_layout;
+       }
+
+       *elements = __elements;
+       /* Add the header: */
+       if (push_ok) {
+               *hdr = skb_push(skb, hdr_len);
+               return hdr_len;
        }
        /* fall back */
        *hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
        if (!*hdr)
                return -ENOMEM;
+       /* Copy protocol headers behind HW header: */
+       skb_copy_from_linear_data(skb, ((char *)*hdr) + hdr_len, proto_len);
        return 0;
 }
-EXPORT_SYMBOL_GPL(qeth_push_hdr);
+EXPORT_SYMBOL_GPL(qeth_add_hw_header);
 
 static void __qeth_fill_buffer(struct sk_buff *skb,
                               struct qeth_qdio_out_buffer *buf,
@@ -4241,24 +4251,6 @@ void qeth_setadp_promisc_mode(struct qeth_card *card)
 }
 EXPORT_SYMBOL_GPL(qeth_setadp_promisc_mode);
 
-int qeth_change_mtu(struct net_device *dev, int new_mtu)
-{
-       struct qeth_card *card;
-       char dbf_text[15];
-
-       card = dev->ml_priv;
-
-       QETH_CARD_TEXT(card, 4, "chgmtu");
-       sprintf(dbf_text, "%8x", new_mtu);
-       QETH_CARD_TEXT(card, 4, dbf_text);
-
-       if (!qeth_mtu_is_valid(card, new_mtu))
-               return -EINVAL;
-       dev->mtu = new_mtu;
-       return 0;
-}
-EXPORT_SYMBOL_GPL(qeth_change_mtu);
-
 struct net_device_stats *qeth_get_stats(struct net_device *dev)
 {
        struct qeth_card *card;
@@ -4834,9 +4826,6 @@ int qeth_vm_request_mac(struct qeth_card *card)
 
        QETH_DBF_TEXT(SETUP, 2, "vmreqmac");
 
-       if (!card->dev)
-               return -ENODEV;
-
        request = kzalloc(sizeof(*request), GFP_KERNEL | GFP_DMA);
        response = kzalloc(sizeof(*response), GFP_KERNEL | GFP_DMA);
        if (!request || !response) {
@@ -5716,6 +5705,53 @@ static void qeth_clear_dbf_list(void)
        mutex_unlock(&qeth_dbf_list_mutex);
 }
 
+static struct net_device *qeth_alloc_netdev(struct qeth_card *card)
+{
+       struct net_device *dev;
+
+       switch (card->info.type) {
+       case QETH_CARD_TYPE_IQD:
+               dev = alloc_netdev(0, "hsi%d", NET_NAME_UNKNOWN, ether_setup);
+               break;
+       case QETH_CARD_TYPE_OSN:
+               dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN, ether_setup);
+               break;
+       default:
+               dev = alloc_etherdev(0);
+       }
+
+       if (!dev)
+               return NULL;
+
+       dev->ml_priv = card;
+       dev->watchdog_timeo = QETH_TX_TIMEOUT;
+       dev->min_mtu = IS_OSN(card) ? 64 : 576;
+        /* initialized when device first goes online: */
+       dev->max_mtu = 0;
+       dev->mtu = 0;
+       SET_NETDEV_DEV(dev, &card->gdev->dev);
+       netif_carrier_off(dev);
+
+       if (!IS_OSN(card)) {
+               dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+               dev->hw_features |= NETIF_F_SG;
+               dev->vlan_features |= NETIF_F_SG;
+       }
+
+       return dev;
+}
+
+struct net_device *qeth_clone_netdev(struct net_device *orig)
+{
+       struct net_device *clone = qeth_alloc_netdev(orig->ml_priv);
+
+       if (!clone)
+               return NULL;
+
+       clone->dev_port = orig->dev_port;
+       return clone;
+}
+
 static int qeth_core_probe_device(struct ccwgroup_device *gdev)
 {
        struct qeth_card *card;
@@ -5765,6 +5801,10 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
                goto err_card;
        }
 
+       card->dev = qeth_alloc_netdev(card);
+       if (!card->dev)
+               goto err_card;
+
        qeth_determine_capabilities(card);
        enforced_disc = qeth_enforce_discipline(card);
        switch (enforced_disc) {
@@ -5775,7 +5815,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
                card->info.layer_enforced = true;
                rc = qeth_core_load_discipline(card, enforced_disc);
                if (rc)
-                       goto err_card;
+                       goto err_load;
 
                gdev->dev.type = (card->info.type != QETH_CARD_TYPE_OSN)
                                        ? card->discipline->devtype
@@ -5793,6 +5833,8 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
 
 err_disc:
        qeth_core_free_discipline(card);
+err_load:
+       free_netdev(card->dev);
 err_card:
        qeth_core_free_card(card);
 err_dev:
@@ -5815,10 +5857,10 @@ static void qeth_core_remove_device(struct ccwgroup_device *gdev)
        write_lock_irqsave(&qeth_core_card_list.rwlock, flags);
        list_del(&card->list);
        write_unlock_irqrestore(&qeth_core_card_list.rwlock, flags);
+       free_netdev(card->dev);
        qeth_core_free_card(card);
        dev_set_drvdata(&gdev->dev, NULL);
        put_device(&gdev->dev);
-       return;
 }
 
 static int qeth_core_set_online(struct ccwgroup_device *gdev)
@@ -5887,31 +5929,13 @@ static int qeth_core_restore(struct ccwgroup_device *gdev)
        return 0;
 }
 
-static struct ccwgroup_driver qeth_core_ccwgroup_driver = {
-       .driver = {
-               .owner = THIS_MODULE,
-               .name = "qeth",
-       },
-       .ccw_driver = &qeth_ccw_driver,
-       .setup = qeth_core_probe_device,
-       .remove = qeth_core_remove_device,
-       .set_online = qeth_core_set_online,
-       .set_offline = qeth_core_set_offline,
-       .shutdown = qeth_core_shutdown,
-       .prepare = NULL,
-       .complete = NULL,
-       .freeze = qeth_core_freeze,
-       .thaw = qeth_core_thaw,
-       .restore = qeth_core_restore,
-};
-
 static ssize_t group_store(struct device_driver *ddrv, const char *buf,
                           size_t count)
 {
        int err;
 
-       err = ccwgroup_create_dev(qeth_core_root_dev,
-                                 &qeth_core_ccwgroup_driver, 3, buf);
+       err = ccwgroup_create_dev(qeth_core_root_dev, to_ccwgroupdrv(ddrv), 3,
+                                 buf);
 
        return err ? err : count;
 }
@@ -5929,6 +5953,25 @@ static const struct attribute_group *qeth_drv_attr_groups[] = {
        NULL,
 };
 
+static struct ccwgroup_driver qeth_core_ccwgroup_driver = {
+       .driver = {
+               .groups = qeth_drv_attr_groups,
+               .owner = THIS_MODULE,
+               .name = "qeth",
+       },
+       .ccw_driver = &qeth_ccw_driver,
+       .setup = qeth_core_probe_device,
+       .remove = qeth_core_remove_device,
+       .set_online = qeth_core_set_online,
+       .set_offline = qeth_core_set_offline,
+       .shutdown = qeth_core_shutdown,
+       .prepare = NULL,
+       .complete = NULL,
+       .freeze = qeth_core_freeze,
+       .thaw = qeth_core_thaw,
+       .restore = qeth_core_restore,
+};
+
 int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
        struct qeth_card *card = dev->ml_priv;
@@ -5995,7 +6038,7 @@ static struct {
        {"tx skbs packing"},
        {"tx buffers packing"},
        {"tx sg skbs"},
-       {"tx sg frags"},
+       {"tx buffer elements"},
 /* 10 */{"rx sg skbs"},
        {"rx sg frags"},
        {"rx sg page allocs"},
@@ -6054,7 +6097,7 @@ void qeth_core_get_ethtool_stats(struct net_device *dev,
        data[6] = card->perf_stats.skbs_sent_pack;
        data[7] = card->perf_stats.bufs_sent_pack;
        data[8] = card->perf_stats.sg_skbs_sent;
-       data[9] = card->perf_stats.sg_frags_sent;
+       data[9] = card->perf_stats.buf_elements_sent;
        data[10] = card->perf_stats.sg_skbs_rx;
        data[11] = card->perf_stats.sg_frags_rx;
        data[12] = card->perf_stats.sg_alloc_page_rx;
@@ -6620,7 +6663,6 @@ static int __init qeth_core_init(void)
        rc = ccw_driver_register(&qeth_ccw_driver);
        if (rc)
                goto ccw_err;
-       qeth_core_ccwgroup_driver.driver.groups = qeth_drv_attr_groups;
        rc = ccwgroup_driver_register(&qeth_core_ccwgroup_driver);
        if (rc)
                goto ccwgroup_err;
index 878e62f3516915081c7a4f834724a67afd8485f4..cf5ad94e960a0b86ca1d70ff7e69edabc8ba6cb3 100644 (file)
@@ -64,6 +64,9 @@ enum qeth_card_types {
        QETH_CARD_TYPE_OSX     = 2,
 };
 
+#define IS_IQD(card)   ((card)->info.type == QETH_CARD_TYPE_IQD)
+#define IS_OSN(card)   ((card)->info.type == QETH_CARD_TYPE_OSN)
+
 #define QETH_MPC_DIFINFO_LEN_INDICATES_LINK_TYPE 0x18
 /* only the first two bytes are looked at in qeth_get_cardname_short */
 enum qeth_link_types {
index c3f18afb368b1118a1ca56c89bf8115f84db088f..25d0be25bcb35b2242042cf58bf3441785019f53 100644 (file)
@@ -112,7 +112,7 @@ static ssize_t qeth_dev_portno_show(struct device *dev,
        if (!card)
                return -EINVAL;
 
-       return sprintf(buf, "%i\n", card->info.portno);
+       return sprintf(buf, "%i\n", card->dev->dev_port);
 }
 
 static ssize_t qeth_dev_portno_store(struct device *dev,
@@ -143,9 +143,7 @@ static ssize_t qeth_dev_portno_store(struct device *dev,
                rc = -EINVAL;
                goto out;
        }
-       card->info.portno = portno;
-       if (card->dev)
-               card->dev->dev_port = portno;
+       card->dev->dev_port = portno;
 out:
        mutex_unlock(&card->conf_mutex);
        return rc ? rc : count;
@@ -388,6 +386,7 @@ static ssize_t qeth_dev_layer2_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t count)
 {
        struct qeth_card *card = dev_get_drvdata(dev);
+       struct net_device *ndev;
        char *tmp;
        int i, rc = 0;
        enum qeth_discipline_id newdis;
@@ -424,8 +423,19 @@ static ssize_t qeth_dev_layer2_store(struct device *dev,
 
        card->info.mac_bits = 0;
        if (card->discipline) {
+               /* start with a new, pristine netdevice: */
+               ndev = qeth_clone_netdev(card->dev);
+               if (!ndev) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
                card->discipline->remove(card->gdev);
                qeth_core_free_discipline(card);
+               card->options.layer2 = -1;
+
+               free_netdev(card->dev);
+               card->dev = ndev;
        }
 
        rc = qeth_core_load_discipline(card, newdis);
index f2130051ca11a9c609110661170fd13424f0ab37..ddc615b431a8c826c29a46cbde29bfa47b08caa4 100644 (file)
@@ -14,6 +14,11 @@ extern const struct attribute_group *qeth_l2_attr_groups[];
 int qeth_l2_create_device_attributes(struct device *);
 void qeth_l2_remove_device_attributes(struct device *);
 void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card);
+int qeth_bridgeport_query_ports(struct qeth_card *card,
+                               enum qeth_sbp_roles *role,
+                               enum qeth_sbp_states *state);
+int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role);
+int qeth_bridgeport_an_set(struct qeth_card *card, int enable);
 
 int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state);
 int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state);
index 2487f0aeb165c1afae905540d1ff547f7fab4f54..c1829a4b955dfe8fcd2c108ac2c4598f713167ad 100644 (file)
@@ -26,7 +26,6 @@
 
 static int qeth_l2_set_offline(struct ccwgroup_device *);
 static int qeth_l2_stop(struct net_device *);
-static void qeth_l2_set_rx_mode(struct net_device *);
 static void qeth_bridgeport_query_support(struct qeth_card *card);
 static void qeth_bridge_state_change(struct qeth_card *card,
                                        struct qeth_ipa_cmd *cmd);
@@ -186,12 +185,12 @@ static void qeth_l2_del_all_macs(struct qeth_card *card)
 static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 {
        if (card->info.type == QETH_CARD_TYPE_OSN)
-               return RTN_UNSPEC;
+               return RTN_UNICAST;
        if (is_broadcast_ether_addr(skb->data))
                return RTN_BROADCAST;
        if (is_multicast_ether_addr(skb->data))
                return RTN_MULTICAST;
-       return RTN_UNSPEC;
+       return RTN_UNICAST;
 }
 
 static void qeth_l2_fill_header(struct qeth_hdr *hdr, struct sk_buff *skb,
@@ -344,7 +343,6 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev,
                rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN);
                kfree(tmpid);
        }
-       qeth_l2_set_rx_mode(card->dev);
        return rc;
 }
 
@@ -643,97 +641,58 @@ static void qeth_l2_set_rx_mode(struct net_device *dev)
                qeth_promisc_to_bridge(card);
 }
 
-static int qeth_l2_xmit_iqd(struct qeth_card *card, struct sk_buff *skb,
-                           struct qeth_qdio_out_q *queue, int cast_type)
+static int qeth_l2_xmit(struct qeth_card *card, struct sk_buff *skb,
+                       struct qeth_qdio_out_q *queue, int cast_type, int ipv)
 {
-       unsigned int data_offset = ETH_HLEN;
-       struct qeth_hdr *hdr;
-       int rc;
-
-       hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
-       if (!hdr)
-               return -ENOMEM;
-       qeth_l2_fill_header(hdr, skb, cast_type, skb->len);
-       skb_copy_from_linear_data(skb, ((char *)hdr) + sizeof(*hdr),
-                                 data_offset);
-
-       if (!qeth_get_elements_no(card, skb, 1, data_offset)) {
-               rc = -E2BIG;
-               goto out;
-       }
-       rc = qeth_do_send_packet_fast(queue, skb, hdr, data_offset,
-                                     sizeof(*hdr) + data_offset);
-out:
-       if (rc)
-               kmem_cache_free(qeth_core_header_cache, hdr);
-       return rc;
-}
-
-static int qeth_l2_xmit_osa(struct qeth_card *card, struct sk_buff *skb,
-                           struct qeth_qdio_out_q *queue, int cast_type,
-                           int ipv)
-{
-       int push_len = sizeof(struct qeth_hdr);
-       unsigned int elements, nr_frags;
-       unsigned int hdr_elements = 0;
+       const unsigned int proto_len = IS_IQD(card) ? ETH_HLEN : 0;
+       const unsigned int hw_hdr_len = sizeof(struct qeth_hdr);
+       unsigned int frame_len = skb->len;
+       unsigned int data_offset = 0;
        struct qeth_hdr *hdr = NULL;
        unsigned int hd_len = 0;
-       int rc;
-
-       /* fix hardware limitation: as long as we do not have sbal
-        * chaining we can not send long frag lists
-        */
-       if (!qeth_get_elements_no(card, skb, 0, 0)) {
-               rc = skb_linearize(skb);
-
-               if (card->options.performance_stats) {
-                       if (rc)
-                               card->perf_stats.tx_linfail++;
-                       else
-                               card->perf_stats.tx_lin++;
-               }
-               if (rc)
-                       return rc;
-       }
-       nr_frags = skb_shinfo(skb)->nr_frags;
+       unsigned int elements;
+       int push_len, rc;
+       bool is_sg;
 
-       rc = skb_cow_head(skb, push_len);
+       rc = skb_cow_head(skb, hw_hdr_len);
        if (rc)
                return rc;
-       push_len = qeth_push_hdr(skb, &hdr, push_len);
+
+       push_len = qeth_add_hw_header(card, skb, &hdr, hw_hdr_len, proto_len,
+                                     &elements);
        if (push_len < 0)
                return push_len;
        if (!push_len) {
-               /* hdr was allocated from cache */
-               hd_len = sizeof(*hdr);
-               hdr_elements = 1;
+               /* HW header needs its own buffer element. */
+               hd_len = hw_hdr_len + proto_len;
+               data_offset = proto_len;
        }
-       qeth_l2_fill_header(hdr, skb, cast_type, skb->len - push_len);
+       qeth_l2_fill_header(hdr, skb, cast_type, frame_len);
        if (skb->ip_summed == CHECKSUM_PARTIAL) {
                qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], ipv);
                if (card->options.performance_stats)
                        card->perf_stats.tx_csum++;
        }
 
-       elements = qeth_get_elements_no(card, skb, hdr_elements, 0);
-       if (!elements) {
-               rc = -E2BIG;
-               goto out;
+       is_sg = skb_is_nonlinear(skb);
+       if (IS_IQD(card)) {
+               rc = qeth_do_send_packet_fast(queue, skb, hdr, data_offset,
+                                             hd_len);
+       } else {
+               /* TODO: drop skb_orphan() once TX completion is fast enough */
+               skb_orphan(skb);
+               rc = qeth_do_send_packet(card, queue, skb, hdr, data_offset,
+                                        hd_len, elements);
        }
-       elements += hdr_elements;
 
-       /* TODO: remove the skb_orphan() once TX completion is fast enough */
-       skb_orphan(skb);
-       rc = qeth_do_send_packet(card, queue, skb, hdr, 0, hd_len, elements);
-out:
        if (!rc) {
-               if (card->options.performance_stats && nr_frags) {
-                       card->perf_stats.sg_skbs_sent++;
-                       /* nr_frags + skb->data */
-                       card->perf_stats.sg_frags_sent += nr_frags + 1;
+               if (card->options.performance_stats) {
+                       card->perf_stats.buf_elements_sent += elements;
+                       if (is_sg)
+                               card->perf_stats.sg_skbs_sent++;
                }
        } else {
-               if (hd_len)
+               if (!push_len)
                        kmem_cache_free(qeth_core_header_cache, hdr);
                if (rc == -EBUSY)
                        /* roll back to ETH header */
@@ -770,34 +729,23 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
        int tx_bytes = skb->len;
        int rc;
 
-       if (card->qdio.do_prio_queueing || (cast_type &&
-                                       card->info.is_multicast_different))
-               queue = card->qdio.out_qs[qeth_get_priority_queue(card, skb,
-                                       ipv, cast_type)];
-       else
-               queue = card->qdio.out_qs[card->qdio.default_out_queue];
-
        if ((card->state != CARD_STATE_UP) || !card->lan_online) {
                card->stats.tx_carrier_errors++;
                goto tx_drop;
        }
 
+       queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
+
        if (card->options.performance_stats) {
                card->perf_stats.outbound_cnt++;
                card->perf_stats.outbound_start_time = qeth_get_micros();
        }
        netif_stop_queue(dev);
 
-       switch (card->info.type) {
-       case QETH_CARD_TYPE_OSN:
+       if (IS_OSN(card))
                rc = qeth_l2_xmit_osn(card, skb, queue);
-               break;
-       case QETH_CARD_TYPE_IQD:
-               rc = qeth_l2_xmit_iqd(card, skb, queue, cast_type);
-               break;
-       default:
-               rc = qeth_l2_xmit_osa(card, skb, queue, cast_type, ipv);
-       }
+       else
+               rc = qeth_l2_xmit(card, skb, queue, cast_type, ipv);
 
        if (!rc) {
                card->stats.tx_packets++;
@@ -906,13 +854,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 
        if (cgdev->state == CCWGROUP_ONLINE)
                qeth_l2_set_offline(cgdev);
-
-       if (card->dev) {
-               unregister_netdev(card->dev);
-               free_netdev(card->dev);
-               card->dev = NULL;
-       }
-       return;
+       unregister_netdev(card->dev);
 }
 
 static const struct ethtool_ops qeth_l2_ethtool_ops = {
@@ -941,7 +883,6 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
        .ndo_set_rx_mode        = qeth_l2_set_rx_mode,
        .ndo_do_ioctl           = qeth_do_ioctl,
        .ndo_set_mac_address    = qeth_l2_set_mac_address,
-       .ndo_change_mtu         = qeth_change_mtu,
        .ndo_vlan_rx_add_vid    = qeth_l2_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = qeth_l2_vlan_rx_kill_vid,
        .ndo_tx_timeout         = qeth_tx_timeout,
@@ -951,35 +892,19 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
 
 static int qeth_l2_setup_netdev(struct qeth_card *card)
 {
-       switch (card->info.type) {
-       case QETH_CARD_TYPE_IQD:
-               card->dev = alloc_netdev(0, "hsi%d", NET_NAME_UNKNOWN,
-                                        ether_setup);
-               break;
-       case QETH_CARD_TYPE_OSN:
-               card->dev = alloc_netdev(0, "osn%d", NET_NAME_UNKNOWN,
-                                        ether_setup);
-               break;
-       default:
-               card->dev = alloc_etherdev(0);
-       }
+       int rc;
 
-       if (!card->dev)
-               return -ENODEV;
+       if (card->dev->netdev_ops)
+               return 0;
 
-       card->dev->ml_priv = card;
        card->dev->priv_flags |= IFF_UNICAST_FLT;
-       card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
-       card->dev->mtu = card->info.initial_mtu;
-       card->dev->min_mtu = 64;
-       card->dev->max_mtu = ETH_MAX_MTU;
-       card->dev->dev_port = card->info.portno;
        card->dev->netdev_ops = &qeth_l2_netdev_ops;
        if (card->info.type == QETH_CARD_TYPE_OSN) {
                card->dev->ethtool_ops = &qeth_l2_osn_ops;
                card->dev->flags |= IFF_NOARP;
        } else {
                card->dev->ethtool_ops = &qeth_l2_ethtool_ops;
+               card->dev->needed_headroom = sizeof(struct qeth_hdr);
        }
 
        if (card->info.type == QETH_CARD_TYPE_OSM)
@@ -987,14 +912,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
        else
                card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
-       if (card->info.type != QETH_CARD_TYPE_OSN &&
-           card->info.type != QETH_CARD_TYPE_IQD) {
-               card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
-               card->dev->needed_headroom = sizeof(struct qeth_hdr);
-               card->dev->hw_features |= NETIF_F_SG;
-               card->dev->vlan_features |= NETIF_F_SG;
-       }
-
        if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
                card->dev->features |= NETIF_F_SG;
                /* OSA 3S and earlier has no RX/TX support */
@@ -1013,12 +930,12 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
                card->dev->vlan_features |= NETIF_F_RXCSUM;
        }
 
-       card->info.broadcast_capable = 1;
        qeth_l2_request_initial_mac(card);
-       SET_NETDEV_DEV(card->dev, &card->gdev->dev);
        netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
-       netif_carrier_off(card->dev);
-       return register_netdev(card->dev);
+       rc = register_netdev(card->dev);
+       if (rc)
+               card->dev->netdev_ops = NULL;
+       return rc;
 }
 
 static int qeth_l2_start_ipassists(struct qeth_card *card)
@@ -1064,10 +981,9 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
                dev_info(&card->gdev->dev,
                "The device represents a Bridge Capable Port\n");
 
-       if (!card->dev && qeth_l2_setup_netdev(card)) {
-               rc = -ENODEV;
+       rc = qeth_l2_setup_netdev(card);
+       if (rc)
                goto out_remove;
-       }
 
        if (card->info.type != QETH_CARD_TYPE_OSN &&
            !qeth_l2_send_setmac(card, card->dev->dev_addr))
@@ -1125,13 +1041,12 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
                if (recovery_mode &&
                    card->info.type != QETH_CARD_TYPE_OSN) {
                        __qeth_l2_open(card->dev);
+                       qeth_l2_set_rx_mode(card->dev);
                } else {
                        rtnl_lock();
                        dev_open(card->dev);
                        rtnl_unlock();
                }
-               /* this also sets saved unicast addresses */
-               qeth_l2_set_rx_mode(card->dev);
        }
        /* let user_space know that device is online */
        kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
@@ -1171,8 +1086,7 @@ static int __qeth_l2_set_offline(struct ccwgroup_device *cgdev,
        QETH_DBF_TEXT(SETUP, 3, "setoffl");
        QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *));
 
-       if (card->dev && netif_carrier_ok(card->dev))
-               netif_carrier_off(card->dev);
+       netif_carrier_off(card->dev);
        recover_flag = card->state;
        if ((!recovery_mode && card->info.hwtrap) || card->info.hwtrap == 2) {
                qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM);
@@ -1245,8 +1159,7 @@ static int qeth_l2_pm_suspend(struct ccwgroup_device *gdev)
 {
        struct qeth_card *card = dev_get_drvdata(&gdev->dev);
 
-       if (card->dev)
-               netif_device_detach(card->dev);
+       netif_device_detach(card->dev);
        qeth_set_allowed_threads(card, 0, 1);
        wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
        if (gdev->state == CCWGROUP_OFFLINE)
@@ -1279,8 +1192,7 @@ static int qeth_l2_pm_resume(struct ccwgroup_device *gdev)
                rc = __qeth_l2_set_online(card->gdev, 0);
 out:
        qeth_set_allowed_threads(card, 0xffffffff, 0);
-       if (card->dev)
-               netif_device_attach(card->dev);
+       netif_device_attach(card->dev);
        if (rc)
                dev_warn(&card->gdev->dev, "The qeth device driver "
                        "failed to recover an error on the device\n");
@@ -1877,7 +1789,6 @@ int qeth_bridgeport_query_ports(struct qeth_card *card,
                return rc;
        return qeth_bridgeport_makerc(card, &cbctl, IPA_SBP_QUERY_BRIDGE_PORTS);
 }
-EXPORT_SYMBOL_GPL(qeth_bridgeport_query_ports);
 
 static int qeth_bridgeport_set_cb(struct qeth_card *card,
        struct qeth_reply *reply, unsigned long data)
@@ -2025,7 +1936,6 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable)
                rc = qdio_pnso_brinfo(schid, 0, &response, NULL, NULL);
        return qeth_anset_makerc(card, rc, response);
 }
-EXPORT_SYMBOL_GPL(qeth_bridgeport_an_set);
 
 static bool qeth_bridgeport_is_in_use(struct qeth_card *card)
 {
index 5905dc63e2569baf761611ad25bf3b91786a3235..1833e7505aca45fd5659437677c34494538fe2e2 100644 (file)
@@ -1978,17 +1978,17 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb)
                    (cast_type == RTN_MULTICAST) ||
                    (cast_type == RTN_ANYCAST))
                        return cast_type;
-               return RTN_UNSPEC;
+               return RTN_UNICAST;
        }
        rcu_read_unlock();
 
        /* no neighbour (eg AF_PACKET), fall back to target's IP address ... */
        if (be16_to_cpu(skb->protocol) == ETH_P_IPV6)
                return ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ?
-                               RTN_MULTICAST : RTN_UNSPEC;
+                               RTN_MULTICAST : RTN_UNICAST;
        else if (be16_to_cpu(skb->protocol) == ETH_P_IP)
                return ipv4_is_multicast(ip_hdr(skb)->daddr) ?
-                               RTN_MULTICAST : RTN_UNSPEC;
+                               RTN_MULTICAST : RTN_UNICAST;
 
        /* ... and MAC address */
        if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, skb->dev->broadcast))
@@ -1997,22 +1997,21 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb)
                return RTN_MULTICAST;
 
        /* default to unicast */
-       return RTN_UNSPEC;
+       return RTN_UNICAST;
 }
 
-static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card,
-               struct qeth_hdr *hdr, struct sk_buff *skb)
+static void qeth_l3_fill_af_iucv_hdr(struct qeth_hdr *hdr, struct sk_buff *skb,
+                                    unsigned int data_len)
 {
        char daddr[16];
        struct af_iucv_trans_hdr *iucv_hdr;
 
        memset(hdr, 0, sizeof(struct qeth_hdr));
        hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3;
-       hdr->hdr.l3.ext_flags = 0;
-       hdr->hdr.l3.length = skb->len - ETH_HLEN;
+       hdr->hdr.l3.length = data_len;
        hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST;
 
-       iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN);
+       iucv_hdr = (struct af_iucv_trans_hdr *)(skb_mac_header(skb) + ETH_HLEN);
        memset(daddr, 0, sizeof(daddr));
        daddr[0] = 0xfe;
        daddr[1] = 0x80;
@@ -2051,6 +2050,12 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
                hdr->hdr.l3.vlan_id = skb_vlan_tag_get(skb);
        }
 
+       if (!skb_is_gso(skb) && skb->ip_summed == CHECKSUM_PARTIAL) {
+               qeth_tx_csum(skb, &hdr->hdr.l3.ext_flags, ipv);
+               if (card->options.performance_stats)
+                       card->perf_stats.tx_csum++;
+       }
+
        /* OSA only: */
        if (!ipv) {
                hdr->hdr.l3.flags = QETH_HDR_PASSTHRU;
@@ -2156,106 +2161,121 @@ static int qeth_l3_get_elements_no_tso(struct qeth_card *card,
        return elements;
 }
 
-static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
-                                          struct net_device *dev)
+static int qeth_l3_xmit_offload(struct qeth_card *card, struct sk_buff *skb,
+                               struct qeth_qdio_out_q *queue, int ipv,
+                               int cast_type)
 {
-       int rc;
-       __be16 *tag;
+       const unsigned int hw_hdr_len = sizeof(struct qeth_hdr);
+       unsigned int frame_len, elements;
+       unsigned char eth_hdr[ETH_HLEN];
        struct qeth_hdr *hdr = NULL;
-       int hdr_elements = 0;
-       int elements;
-       struct qeth_card *card = dev->ml_priv;
-       struct sk_buff *new_skb = NULL;
-       int ipv = qeth_get_ip_version(skb);
-       int cast_type = qeth_l3_get_cast_type(skb);
-       struct qeth_qdio_out_q *queue =
-               card->qdio.out_qs[card->qdio.do_prio_queueing
-                       || (cast_type && card->info.is_multicast_different) ?
-                       qeth_get_priority_queue(card, skb, ipv, cast_type) :
-                       card->qdio.default_out_queue];
-       int tx_bytes = skb->len;
        unsigned int hd_len = 0;
-       bool use_tso;
-       int data_offset = -1;
-       unsigned int nr_frags;
-
-       if (((card->info.type == QETH_CARD_TYPE_IQD) &&
-            (((card->options.cq != QETH_CQ_ENABLED) && !ipv) ||
-             ((card->options.cq == QETH_CQ_ENABLED) &&
-              (be16_to_cpu(skb->protocol) != ETH_P_AF_IUCV)))) ||
-           card->options.sniffer)
-                       goto tx_drop;
+       int push_len, rc;
+       bool is_sg;
 
-       if ((card->state != CARD_STATE_UP) || !card->lan_online) {
-               card->stats.tx_carrier_errors++;
-               goto tx_drop;
+       /* re-use the L2 header area for the HW header: */
+       rc = skb_cow_head(skb, hw_hdr_len - ETH_HLEN);
+       if (rc)
+               return rc;
+       skb_copy_from_linear_data(skb, eth_hdr, ETH_HLEN);
+       skb_pull(skb, ETH_HLEN);
+       frame_len = skb->len;
+
+       push_len = qeth_add_hw_header(card, skb, &hdr, hw_hdr_len, 0,
+                                     &elements);
+       if (push_len < 0)
+               return push_len;
+       if (!push_len) {
+               /* hdr was added discontiguous from skb->data */
+               hd_len = hw_hdr_len;
        }
 
-       if ((cast_type == RTN_BROADCAST) &&
-           (card->info.broadcast_capable == 0))
-               goto tx_drop;
+       if (skb->protocol == htons(ETH_P_AF_IUCV))
+               qeth_l3_fill_af_iucv_hdr(hdr, skb, frame_len);
+       else
+               qeth_l3_fill_header(card, hdr, skb, ipv, cast_type, frame_len);
 
-       if (card->options.performance_stats) {
-               card->perf_stats.outbound_cnt++;
-               card->perf_stats.outbound_start_time = qeth_get_micros();
+       is_sg = skb_is_nonlinear(skb);
+       if (IS_IQD(card)) {
+               rc = qeth_do_send_packet_fast(queue, skb, hdr, 0, hd_len);
+       } else {
+               /* TODO: drop skb_orphan() once TX completion is fast enough */
+               skb_orphan(skb);
+               rc = qeth_do_send_packet(card, queue, skb, hdr, 0, hd_len,
+                                        elements);
+       }
+
+       if (!rc) {
+               if (card->options.performance_stats) {
+                       card->perf_stats.buf_elements_sent += elements;
+                       if (is_sg)
+                               card->perf_stats.sg_skbs_sent++;
+               }
+       } else {
+               if (!push_len)
+                       kmem_cache_free(qeth_core_header_cache, hdr);
+               if (rc == -EBUSY) {
+                       /* roll back to ETH header */
+                       skb_pull(skb, push_len);
+                       skb_push(skb, ETH_HLEN);
+                       skb_copy_to_linear_data(skb, eth_hdr, ETH_HLEN);
+               }
        }
+       return rc;
+}
+
+static int qeth_l3_xmit(struct qeth_card *card, struct sk_buff *skb,
+                       struct qeth_qdio_out_q *queue, int ipv, int cast_type)
+{
+       int elements, len, rc;
+       __be16 *tag;
+       struct qeth_hdr *hdr = NULL;
+       int hdr_elements = 0;
+       struct sk_buff *new_skb = NULL;
+       int tx_bytes = skb->len;
+       unsigned int hd_len;
+       bool use_tso, is_sg;
 
        /* Ignore segment size from skb_is_gso(), 1 page is always used. */
        use_tso = skb_is_gso(skb) &&
                  (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4);
 
-       if (card->info.type == QETH_CARD_TYPE_IQD) {
-               new_skb = skb;
-               data_offset = ETH_HLEN;
-               hd_len = sizeof(*hdr);
-               hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
-               if (!hdr)
-                       goto tx_drop;
-               hdr_elements++;
-       } else {
-               /* create a clone with writeable headroom */
-               new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso)
-                                       + VLAN_HLEN);
-               if (!new_skb)
-                       goto tx_drop;
-
-               if (ipv == 4) {
-                       skb_pull(new_skb, ETH_HLEN);
-               }
+       /* create a clone with writeable headroom */
+       new_skb = skb_realloc_headroom(skb, sizeof(struct qeth_hdr_tso) +
+                                           VLAN_HLEN);
+       if (!new_skb)
+               return -ENOMEM;
 
-               if (ipv != 4 && skb_vlan_tag_present(new_skb)) {
-                       skb_push(new_skb, VLAN_HLEN);
-                       skb_copy_to_linear_data(new_skb, new_skb->data + 4, 4);
-                       skb_copy_to_linear_data_offset(new_skb, 4,
-                               new_skb->data + 8, 4);
-                       skb_copy_to_linear_data_offset(new_skb, 8,
-                               new_skb->data + 12, 4);
-                       tag = (__be16 *)(new_skb->data + 12);
-                       *tag = cpu_to_be16(ETH_P_8021Q);
-                       *(tag + 1) = cpu_to_be16(skb_vlan_tag_get(new_skb));
-               }
+       if (ipv == 4) {
+               skb_pull(new_skb, ETH_HLEN);
+       } else if (skb_vlan_tag_present(new_skb)) {
+               skb_push(new_skb, VLAN_HLEN);
+               skb_copy_to_linear_data(new_skb, new_skb->data + 4, 4);
+               skb_copy_to_linear_data_offset(new_skb, 4,
+                                              new_skb->data + 8, 4);
+               skb_copy_to_linear_data_offset(new_skb, 8,
+                                              new_skb->data + 12, 4);
+               tag = (__be16 *)(new_skb->data + 12);
+               *tag = cpu_to_be16(ETH_P_8021Q);
+               *(tag + 1) = cpu_to_be16(skb_vlan_tag_get(new_skb));
        }
 
-       netif_stop_queue(dev);
-
        /* fix hardware limitation: as long as we do not have sbal
         * chaining we can not send long frag lists
         */
-       if ((card->info.type != QETH_CARD_TYPE_IQD) &&
-           ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) ||
-            (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) {
-               int lin_rc = skb_linearize(new_skb);
+       if ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) ||
+           (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0))) {
+               rc = skb_linearize(new_skb);
 
                if (card->options.performance_stats) {
-                       if (lin_rc)
+                       if (rc)
                                card->perf_stats.tx_linfail++;
                        else
                                card->perf_stats.tx_lin++;
                }
-               if (lin_rc)
-                       goto tx_drop;
+               if (rc)
+                       goto out;
        }
-       nr_frags = skb_shinfo(new_skb)->nr_frags;
 
        if (use_tso) {
                hdr = skb_push(new_skb, sizeof(struct qeth_hdr_tso));
@@ -2265,97 +2285,112 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
                qeth_tso_fill_header(card, hdr, new_skb);
                hdr_elements++;
        } else {
-               if (data_offset < 0) {
-                       hdr = skb_push(new_skb, sizeof(struct qeth_hdr));
-                       qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type,
-                                           new_skb->len -
-                                           sizeof(struct qeth_hdr));
-               } else {
-                       if (be16_to_cpu(new_skb->protocol) == ETH_P_AF_IUCV)
-                               qeth_l3_fill_af_iucv_hdr(card, hdr, new_skb);
-                       else {
-                               qeth_l3_fill_header(card, hdr, new_skb, ipv,
-                                                   cast_type,
-                                                   new_skb->len - data_offset);
-                       }
-               }
-
-               if (new_skb->ip_summed == CHECKSUM_PARTIAL) {
-                       qeth_tx_csum(new_skb, &hdr->hdr.l3.ext_flags, ipv);
-                       if (card->options.performance_stats)
-                               card->perf_stats.tx_csum++;
-               }
+               hdr = skb_push(new_skb, sizeof(struct qeth_hdr));
+               qeth_l3_fill_header(card, hdr, new_skb, ipv, cast_type,
+                                   new_skb->len - sizeof(struct qeth_hdr));
        }
 
        elements = use_tso ?
                   qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) :
-                  qeth_get_elements_no(card, new_skb, hdr_elements,
-                                       (data_offset > 0) ? data_offset : 0);
+                  qeth_get_elements_no(card, new_skb, hdr_elements, 0);
        if (!elements) {
-               if (data_offset >= 0)
-                       kmem_cache_free(qeth_core_header_cache, hdr);
-               goto tx_drop;
+               rc = -E2BIG;
+               goto out;
        }
        elements += hdr_elements;
 
-       if (card->info.type != QETH_CARD_TYPE_IQD) {
-               int len;
-               if (use_tso) {
-                       hd_len = sizeof(struct qeth_hdr_tso) +
-                                ip_hdrlen(new_skb) + tcp_hdrlen(new_skb);
-                       len = hd_len;
-               } else {
-                       len = sizeof(struct qeth_hdr_layer3);
-               }
+       if (use_tso) {
+               hd_len = sizeof(struct qeth_hdr_tso) +
+                        ip_hdrlen(new_skb) + tcp_hdrlen(new_skb);
+               len = hd_len;
+       } else {
+               hd_len = 0;
+               len = sizeof(struct qeth_hdr_layer3);
+       }
 
-               if (qeth_hdr_chk_and_bounce(new_skb, &hdr, len))
-                       goto tx_drop;
-               rc = qeth_do_send_packet(card, queue, new_skb, hdr, hd_len,
-                                        hd_len, elements);
-       } else
-               rc = qeth_do_send_packet_fast(queue, new_skb, hdr, data_offset,
-                                             hd_len);
+       if (qeth_hdr_chk_and_bounce(new_skb, &hdr, len)) {
+               rc = -EINVAL;
+               goto out;
+       }
 
+       is_sg = skb_is_nonlinear(new_skb);
+       rc = qeth_do_send_packet(card, queue, new_skb, hdr, hd_len, hd_len,
+                                elements);
+out:
        if (!rc) {
-               card->stats.tx_packets++;
-               card->stats.tx_bytes += tx_bytes;
                if (new_skb != skb)
                        dev_kfree_skb_any(skb);
                if (card->options.performance_stats) {
+                       card->perf_stats.buf_elements_sent += elements;
+                       if (is_sg)
+                               card->perf_stats.sg_skbs_sent++;
                        if (use_tso) {
                                card->perf_stats.large_send_bytes += tx_bytes;
                                card->perf_stats.large_send_cnt++;
                        }
-                       if (nr_frags) {
-                               card->perf_stats.sg_skbs_sent++;
-                               /* nr_frags + skb->data */
-                               card->perf_stats.sg_frags_sent += nr_frags + 1;
-                       }
                }
-               rc = NETDEV_TX_OK;
        } else {
-               if (data_offset >= 0)
-                       kmem_cache_free(qeth_core_header_cache, hdr);
+               if (new_skb != skb)
+                       dev_kfree_skb_any(new_skb);
+       }
+       return rc;
+}
 
-               if (rc == -EBUSY) {
-                       if (new_skb != skb)
-                               dev_kfree_skb_any(new_skb);
-                       return NETDEV_TX_BUSY;
-               } else
+static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
+                                          struct net_device *dev)
+{
+       int cast_type = qeth_l3_get_cast_type(skb);
+       struct qeth_card *card = dev->ml_priv;
+       int ipv = qeth_get_ip_version(skb);
+       struct qeth_qdio_out_q *queue;
+       int tx_bytes = skb->len;
+       int rc;
+
+       if (IS_IQD(card)) {
+               if (card->options.sniffer)
+                       goto tx_drop;
+               if ((card->options.cq != QETH_CQ_ENABLED && !ipv) ||
+                   (card->options.cq == QETH_CQ_ENABLED &&
+                    skb->protocol != htons(ETH_P_AF_IUCV)))
                        goto tx_drop;
        }
 
-       netif_wake_queue(dev);
-       if (card->options.performance_stats)
-               card->perf_stats.outbound_time += qeth_get_micros() -
-                       card->perf_stats.outbound_start_time;
-       return rc;
+       if (card->state != CARD_STATE_UP || !card->lan_online) {
+               card->stats.tx_carrier_errors++;
+               goto tx_drop;
+       }
+
+       if (cast_type == RTN_BROADCAST && !card->info.broadcast_capable)
+               goto tx_drop;
+
+       queue = qeth_get_tx_queue(card, skb, ipv, cast_type);
+
+       if (card->options.performance_stats) {
+               card->perf_stats.outbound_cnt++;
+               card->perf_stats.outbound_start_time = qeth_get_micros();
+       }
+       netif_stop_queue(dev);
+
+       if (IS_IQD(card) || (!skb_is_gso(skb) && ipv == 4))
+               rc = qeth_l3_xmit_offload(card, skb, queue, ipv, cast_type);
+       else
+               rc = qeth_l3_xmit(card, skb, queue, ipv, cast_type);
+
+       if (!rc) {
+               card->stats.tx_packets++;
+               card->stats.tx_bytes += tx_bytes;
+               if (card->options.performance_stats)
+                       card->perf_stats.outbound_time += qeth_get_micros() -
+                               card->perf_stats.outbound_start_time;
+               netif_wake_queue(dev);
+               return NETDEV_TX_OK;
+       } else if (rc == -EBUSY) {
+               return NETDEV_TX_BUSY;
+       } /* else fall through */
 
 tx_drop:
        card->stats.tx_dropped++;
        card->stats.tx_errors++;
-       if ((new_skb != skb) && new_skb)
-               dev_kfree_skb_any(new_skb);
        dev_kfree_skb_any(skb);
        netif_wake_queue(dev);
        return NETDEV_TX_OK;
@@ -2449,7 +2484,6 @@ static const struct net_device_ops qeth_l3_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = qeth_l3_set_rx_mode,
        .ndo_do_ioctl           = qeth_do_ioctl,
-       .ndo_change_mtu         = qeth_change_mtu,
        .ndo_fix_features       = qeth_fix_features,
        .ndo_set_features       = qeth_set_features,
        .ndo_vlan_rx_add_vid    = qeth_l3_vlan_rx_add_vid,
@@ -2466,7 +2500,6 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = qeth_l3_set_rx_mode,
        .ndo_do_ioctl           = qeth_do_ioctl,
-       .ndo_change_mtu         = qeth_change_mtu,
        .ndo_fix_features       = qeth_fix_features,
        .ndo_set_features       = qeth_set_features,
        .ndo_vlan_rx_add_vid    = qeth_l3_vlan_rx_add_vid,
@@ -2479,6 +2512,9 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 {
        int rc;
 
+       if (card->dev->netdev_ops)
+               return 0;
+
        if (card->info.type == QETH_CARD_TYPE_OSD ||
            card->info.type == QETH_CARD_TYPE_OSX) {
                if ((card->info.link_type == QETH_LINK_TYPE_LANE_TR) ||
@@ -2487,9 +2523,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
                        return -ENODEV;
                }
 
-               card->dev = alloc_etherdev(0);
-               if (!card->dev)
-                       return -ENODEV;
                card->dev->netdev_ops = &qeth_l3_osa_netdev_ops;
 
                /*IPv6 address autoconfiguration stuff*/
@@ -2497,9 +2530,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
                if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
                        card->dev->dev_id = card->info.unique_id & 0xffff;
 
-               card->dev->hw_features |= NETIF_F_SG;
-               card->dev->vlan_features |= NETIF_F_SG;
-
                if (!card->info.guestlan) {
                        card->dev->features |= NETIF_F_SG;
                        card->dev->hw_features |= NETIF_F_TSO |
@@ -2513,38 +2543,35 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
                        card->dev->vlan_features |= NETIF_F_IPV6_CSUM;
                }
        } else if (card->info.type == QETH_CARD_TYPE_IQD) {
-               card->dev = alloc_netdev(0, "hsi%d", NET_NAME_UNKNOWN,
-                                        ether_setup);
-               if (!card->dev)
-                       return -ENODEV;
                card->dev->flags |= IFF_NOARP;
                card->dev->netdev_ops = &qeth_l3_netdev_ops;
+
                rc = qeth_l3_iqd_read_initial_mac(card);
                if (rc)
-                       return rc;
+                       goto out;
+
                if (card->options.hsuid[0])
                        memcpy(card->dev->perm_addr, card->options.hsuid, 9);
        } else
                return -ENODEV;
 
-       card->dev->ml_priv = card;
-       card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
-       card->dev->mtu = card->info.initial_mtu;
-       card->dev->min_mtu = 64;
-       card->dev->max_mtu = ETH_MAX_MTU;
-       card->dev->dev_port = card->info.portno;
        card->dev->ethtool_ops = &qeth_l3_ethtool_ops;
+       card->dev->needed_headroom = sizeof(struct qeth_hdr) - ETH_HLEN;
        card->dev->features |=  NETIF_F_HW_VLAN_CTAG_TX |
                                NETIF_F_HW_VLAN_CTAG_RX |
                                NETIF_F_HW_VLAN_CTAG_FILTER;
+
        netif_keep_dst(card->dev);
-       netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
-                                         PAGE_SIZE);
+       if (card->dev->hw_features & NETIF_F_TSO)
+               netif_set_gso_max_size(card->dev,
+                                      PAGE_SIZE * (QETH_MAX_BUFFER_ELEMENTS(card) - 1));
 
-       SET_NETDEV_DEV(card->dev, &card->gdev->dev);
        netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
-       netif_carrier_off(card->dev);
-       return register_netdev(card->dev);
+       rc = register_netdev(card->dev);
+out:
+       if (rc)
+               card->dev->netdev_ops = NULL;
+       return rc;
 }
 
 static const struct device_type qeth_l3_devtype = {
@@ -2582,15 +2609,9 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
        if (cgdev->state == CCWGROUP_ONLINE)
                qeth_l3_set_offline(cgdev);
 
-       if (card->dev) {
-               unregister_netdev(card->dev);
-               free_netdev(card->dev);
-               card->dev = NULL;
-       }
-
+       unregister_netdev(card->dev);
        qeth_l3_clear_ip_htable(card, 0);
        qeth_l3_clear_ipato_list(card);
-       return;
 }
 
 static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
@@ -2612,10 +2633,9 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
                goto out_remove;
        }
 
-       if (!card->dev && qeth_l3_setup_netdev(card)) {
-               rc = -ENODEV;
+       rc = qeth_l3_setup_netdev(card);
+       if (rc)
                goto out_remove;
-       }
 
        if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) {
                if (card->info.hwtrap &&
@@ -2666,11 +2686,12 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
        qeth_enable_hw_features(card->dev);
        if (recover_flag == CARD_STATE_RECOVER) {
                rtnl_lock();
-               if (recovery_mode)
+               if (recovery_mode) {
                        __qeth_l3_open(card->dev);
-               else
+                       qeth_l3_set_rx_mode(card->dev);
+               } else {
                        dev_open(card->dev);
-               qeth_l3_set_rx_mode(card->dev);
+               }
                rtnl_unlock();
        }
        qeth_trace_features(card);
@@ -2711,8 +2732,7 @@ static int __qeth_l3_set_offline(struct ccwgroup_device *cgdev,
        QETH_DBF_TEXT(SETUP, 3, "setoffl");
        QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *));
 
-       if (card->dev && netif_carrier_ok(card->dev))
-               netif_carrier_off(card->dev);
+       netif_carrier_off(card->dev);
        recover_flag = card->state;
        if ((!recovery_mode && card->info.hwtrap) || card->info.hwtrap == 2) {
                qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM);
@@ -2780,8 +2800,7 @@ static int qeth_l3_pm_suspend(struct ccwgroup_device *gdev)
 {
        struct qeth_card *card = dev_get_drvdata(&gdev->dev);
 
-       if (card->dev)
-               netif_device_detach(card->dev);
+       netif_device_detach(card->dev);
        qeth_set_allowed_threads(card, 0, 1);
        wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
        if (gdev->state == CCWGROUP_OFFLINE)
@@ -2814,8 +2833,7 @@ static int qeth_l3_pm_resume(struct ccwgroup_device *gdev)
                rc = __qeth_l3_set_online(card->gdev, 0);
 out:
        qeth_set_allowed_threads(card, 0xffffffff, 0);
-       if (card->dev)
-               netif_device_attach(card->dev);
+       netif_device_attach(card->dev);
        if (rc)
                dev_warn(&card->gdev->dev, "The qeth device driver "
                        "failed to recover an error on the device\n");
index f61192a048f447168a35e970b693c7004a490629..45ac6d8705c69762ebf52f26642fb4c726ce4dd8 100644 (file)
@@ -299,8 +299,7 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
        if (strlen(tmp) == 0) {
                /* delete ip address only */
                card->options.hsuid[0] = '\0';
-               if (card->dev)
-                       memcpy(card->dev->perm_addr, card->options.hsuid, 9);
+               memcpy(card->dev->perm_addr, card->options.hsuid, 9);
                qeth_configure_cq(card, QETH_CQ_DISABLED);
                return count;
        }
@@ -311,8 +310,7 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
        snprintf(card->options.hsuid, sizeof(card->options.hsuid),
                 "%-8s", tmp);
        ASCEBC(card->options.hsuid, 8);
-       if (card->dev)
-               memcpy(card->dev->perm_addr, card->options.hsuid, 9);
+       memcpy(card->dev->perm_addr, card->options.hsuid, 9);
 
        rc = qeth_l3_modify_hsuid(card, true);
 
index e461168313bf95c66e56afff9fba720d9b23c41a..4e6611e4c59beb9cd74d0ec18ea4b6851d5e9f3f 100644 (file)
@@ -290,13 +290,6 @@ static netdev_tx_t xlr_net_start_xmit(struct sk_buff *skb,
        return NETDEV_TX_OK;
 }
 
-static u16 xlr_net_select_queue(struct net_device *ndev, struct sk_buff *skb,
-                               void *accel_priv,
-                               select_queue_fallback_t fallback)
-{
-       return (u16)smp_processor_id();
-}
-
 static void xlr_hw_set_mac_addr(struct net_device *ndev)
 {
        struct xlr_net_priv *priv = netdev_priv(ndev);
@@ -403,7 +396,7 @@ static const struct net_device_ops xlr_netdev_ops = {
        .ndo_open = xlr_net_open,
        .ndo_stop = xlr_net_stop,
        .ndo_start_xmit = xlr_net_start_xmit,
-       .ndo_select_queue = xlr_net_select_queue,
+       .ndo_select_queue = dev_pick_tx_cpu_id,
        .ndo_set_mac_address = xlr_net_set_mac_addr,
        .ndo_set_rx_mode = xlr_set_rx_mode,
        .ndo_get_stats64 = xlr_stats,
index 084a246eec19f8f457eb4dbafb53f0e3129347f3..6790b7c8cfb14f2e7a142dc6d4ec2b5dd3392634 100644 (file)
@@ -575,7 +575,6 @@ enum ht_cap_ampdu_factor {
  * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
  */
 #define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
 
 
 #define OP_MODE_PURE                    0
index add1ba00f3e9ab7c5111f6c3846fa582ed243e53..38e85c8a85c8ba690ae60ef4df9181e959eee0f1 100644 (file)
@@ -253,7 +253,8 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
 }
 
 static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
-                           void *accel_priv, select_queue_fallback_t fallback)
+                           struct net_device *sb_dev,
+                           select_queue_fallback_t fallback)
 {
        struct adapter  *padapter = rtw_netdev_priv(dev);
        struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
index 0ed2f44ab4e958e0cb095081ccd13932cd09b284..00a4302e9983f985a6355302b151f69efb7572ec 100644 (file)
@@ -574,7 +574,6 @@ struct ieee80211_ht_addt_info {
  * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
  */
 #define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
 
 
 /* Spatial Multiplexing Power Save Modes */
index 08bc79840b2373ce798fb31eae59bef70f0bbe7d..559bf2606fb7d932e20bac388de8c124f944a7a7 100644 (file)
@@ -799,7 +799,6 @@ enum HT_CAP_AMPDU_FACTOR {
  * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
  */
 #define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
 
 
 /* Spatial Multiplexing Power Save Modes */
index ace68f023b49db7aec2fd147830e5e2e3cf3c34f..181642358e3fe1b29339f1c285f3e3a44dbe7f67 100644 (file)
@@ -403,10 +403,9 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
 }
 
 
-static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb
-                               , void *accel_priv
-                               , select_queue_fallback_t fallback
-)
+static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
+                           struct net_device *sb_dev,
+                           select_queue_fallback_t fallback)
 {
        struct adapter  *padapter = rtw_netdev_priv(dev);
        struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
index e46e47d93d7d1f087e5fc1a80349f3e3b0637f67..094827c1879a656850613b501f2300777fc435e2 100644 (file)
@@ -1838,7 +1838,7 @@ void rtl_rx_ampdu_apply(struct rtl_priv *rtlpriv)
                 reject_agg, ctrl_agg_size, agg_size);
 
        rtlpriv->hw->max_rx_aggregation_subframes =
-               (ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF);
+               (ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF_HT);
 }
 
 /*********************************************************
index 29756d88799b630f2c73ca097b56b092a14a7d5a..367d8023b54dbabddd526d08cec66fa29bd4f36d 100644 (file)
@@ -94,7 +94,7 @@ struct vhost_net_ubuf_ref {
        struct vhost_virtqueue *vq;
 };
 
-#define VHOST_RX_BATCH 64
+#define VHOST_NET_BATCH 64
 struct vhost_net_buf {
        void **queue;
        int tail;
@@ -168,7 +168,7 @@ static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
 
        rxq->head = 0;
        rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
-                                             VHOST_RX_BATCH);
+                                             VHOST_NET_BATCH);
        return rxq->tail;
 }
 
@@ -396,13 +396,10 @@ static inline unsigned long busy_clock(void)
        return local_clock() >> 10;
 }
 
-static bool vhost_can_busy_poll(struct vhost_dev *dev,
-                               unsigned long endtime)
+static bool vhost_can_busy_poll(unsigned long endtime)
 {
-       return likely(!need_resched()) &&
-              likely(!time_after(busy_clock(), endtime)) &&
-              likely(!signal_pending(current)) &&
-              !vhost_has_work(dev);
+       return likely(!need_resched() && !time_after(busy_clock(), endtime) &&
+                     !signal_pending(current));
 }
 
 static void vhost_net_disable_vq(struct vhost_net *n,
@@ -431,21 +428,42 @@ static int vhost_net_enable_vq(struct vhost_net *n,
        return vhost_poll_start(poll, sock->file);
 }
 
+static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
+{
+       struct vhost_virtqueue *vq = &nvq->vq;
+       struct vhost_dev *dev = vq->dev;
+
+       if (!nvq->done_idx)
+               return;
+
+       vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
+       nvq->done_idx = 0;
+}
+
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
-                                   struct vhost_virtqueue *vq,
-                                   struct iovec iov[], unsigned int iov_size,
-                                   unsigned int *out_num, unsigned int *in_num)
+                                   struct vhost_net_virtqueue *nvq,
+                                   unsigned int *out_num, unsigned int *in_num,
+                                   bool *busyloop_intr)
 {
+       struct vhost_virtqueue *vq = &nvq->vq;
        unsigned long uninitialized_var(endtime);
        int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
                                  out_num, in_num, NULL, NULL);
 
        if (r == vq->num && vq->busyloop_timeout) {
+               if (!vhost_sock_zcopy(vq->private_data))
+                       vhost_net_signal_used(nvq);
                preempt_disable();
                endtime = busy_clock() + vq->busyloop_timeout;
-               while (vhost_can_busy_poll(vq->dev, endtime) &&
-                      vhost_vq_avail_empty(vq->dev, vq))
+               while (vhost_can_busy_poll(endtime)) {
+                       if (vhost_has_work(vq->dev)) {
+                               *busyloop_intr = true;
+                               break;
+                       }
+                       if (!vhost_vq_avail_empty(vq->dev, vq))
+                               break;
                        cpu_relax();
+               }
                preempt_enable();
                r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
                                      out_num, in_num, NULL, NULL);
@@ -463,9 +481,62 @@ static bool vhost_exceeds_maxpend(struct vhost_net *net)
               min_t(unsigned int, VHOST_MAX_PEND, vq->num >> 2);
 }
 
-/* Expects to be always run from workqueue - which acts as
- * read-size critical section for our kind of RCU. */
-static void handle_tx(struct vhost_net *net)
+static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter,
+                           size_t hdr_size, int out)
+{
+       /* Skip header. TODO: support TSO. */
+       size_t len = iov_length(vq->iov, out);
+
+       iov_iter_init(iter, WRITE, vq->iov, out, len);
+       iov_iter_advance(iter, hdr_size);
+
+       return iov_iter_count(iter);
+}
+
+static bool vhost_exceeds_weight(int pkts, int total_len)
+{
+       return total_len >= VHOST_NET_WEIGHT ||
+              pkts >= VHOST_NET_PKT_WEIGHT;
+}
+
+static int get_tx_bufs(struct vhost_net *net,
+                      struct vhost_net_virtqueue *nvq,
+                      struct msghdr *msg,
+                      unsigned int *out, unsigned int *in,
+                      size_t *len, bool *busyloop_intr)
+{
+       struct vhost_virtqueue *vq = &nvq->vq;
+       int ret;
+
+       ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, busyloop_intr);
+
+       if (ret < 0 || ret == vq->num)
+               return ret;
+
+       if (*in) {
+               vq_err(vq, "Unexpected descriptor format for TX: out %d, int %d\n",
+                       *out, *in);
+               return -EFAULT;
+       }
+
+       /* Sanity check */
+       *len = init_iov_iter(vq, &msg->msg_iter, nvq->vhost_hlen, *out);
+       if (*len == 0) {
+               vq_err(vq, "Unexpected header len for TX: %zd expected %zd\n",
+                       *len, nvq->vhost_hlen);
+               return -EFAULT;
+       }
+
+       return ret;
+}
+
+static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len)
+{
+       return total_len < VHOST_NET_WEIGHT &&
+              !vhost_vq_avail_empty(vq->dev, vq);
+}
+
+static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
 {
        struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
        struct vhost_virtqueue *vq = &nvq->vq;
@@ -480,67 +551,103 @@ static void handle_tx(struct vhost_net *net)
        };
        size_t len, total_len = 0;
        int err;
-       size_t hdr_size;
-       struct socket *sock;
-       struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
-       bool zcopy, zcopy_used;
        int sent_pkts = 0;
 
-       mutex_lock(&vq->mutex);
-       sock = vq->private_data;
-       if (!sock)
-               goto out;
+       for (;;) {
+               bool busyloop_intr = false;
 
-       if (!vq_iotlb_prefetch(vq))
-               goto out;
+               head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
+                                  &busyloop_intr);
+               /* On error, stop handling until the next kick. */
+               if (unlikely(head < 0))
+                       break;
+               /* Nothing new?  Wait for eventfd to tell us they refilled. */
+               if (head == vq->num) {
+                       if (unlikely(busyloop_intr)) {
+                               vhost_poll_queue(&vq->poll);
+                       } else if (unlikely(vhost_enable_notify(&net->dev,
+                                                               vq))) {
+                               vhost_disable_notify(&net->dev, vq);
+                               continue;
+                       }
+                       break;
+               }
 
-       vhost_disable_notify(&net->dev, vq);
-       vhost_net_disable_vq(net, vq);
+               vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
+               vq->heads[nvq->done_idx].len = 0;
 
-       hdr_size = nvq->vhost_hlen;
-       zcopy = nvq->ubufs;
+               total_len += len;
+               if (tx_can_batch(vq, total_len))
+                       msg.msg_flags |= MSG_MORE;
+               else
+                       msg.msg_flags &= ~MSG_MORE;
+
+               /* TODO: Check specific error and bomb out unless ENOBUFS? */
+               err = sock->ops->sendmsg(sock, &msg, len);
+               if (unlikely(err < 0)) {
+                       vhost_discard_vq_desc(vq, 1);
+                       vhost_net_enable_vq(net, vq);
+                       break;
+               }
+               if (err != len)
+                       pr_debug("Truncated TX packet: len %d != %zd\n",
+                                err, len);
+               if (++nvq->done_idx >= VHOST_NET_BATCH)
+                       vhost_net_signal_used(nvq);
+               if (vhost_exceeds_weight(++sent_pkts, total_len)) {
+                       vhost_poll_queue(&vq->poll);
+                       break;
+               }
+       }
+
+       vhost_net_signal_used(nvq);
+}
+
+static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
+{
+       struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+       struct vhost_virtqueue *vq = &nvq->vq;
+       unsigned out, in;
+       int head;
+       struct msghdr msg = {
+               .msg_name = NULL,
+               .msg_namelen = 0,
+               .msg_control = NULL,
+               .msg_controllen = 0,
+               .msg_flags = MSG_DONTWAIT,
+       };
+       size_t len, total_len = 0;
+       int err;
+       struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
+       bool zcopy_used;
+       int sent_pkts = 0;
 
        for (;;) {
-               /* Release DMAs done buffers first */
-               if (zcopy)
-                       vhost_zerocopy_signal_used(net, vq);
+               bool busyloop_intr;
 
+               /* Release DMAs done buffers first */
+               vhost_zerocopy_signal_used(net, vq);
 
-               head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
-                                               ARRAY_SIZE(vq->iov),
-                                               &out, &in);
+               busyloop_intr = false;
+               head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
+                                  &busyloop_intr);
                /* On error, stop handling until the next kick. */
                if (unlikely(head < 0))
                        break;
                /* Nothing new?  Wait for eventfd to tell us they refilled. */
                if (head == vq->num) {
-                       if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+                       if (unlikely(busyloop_intr)) {
+                               vhost_poll_queue(&vq->poll);
+                       } else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
                                vhost_disable_notify(&net->dev, vq);
                                continue;
                        }
                        break;
                }
-               if (in) {
-                       vq_err(vq, "Unexpected descriptor format for TX: "
-                              "out %d, int %d\n", out, in);
-                       break;
-               }
-               /* Skip header. TODO: support TSO. */
-               len = iov_length(vq->iov, out);
-               iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
-               iov_iter_advance(&msg.msg_iter, hdr_size);
-               /* Sanity check */
-               if (!msg_data_left(&msg)) {
-                       vq_err(vq, "Unexpected header len for TX: "
-                              "%zd expected %zd\n",
-                              len, hdr_size);
-                       break;
-               }
-               len = msg_data_left(&msg);
 
-               zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
-                                  && !vhost_exceeds_maxpend(net)
-                                  && vhost_net_tx_select_zcopy(net);
+               zcopy_used = len >= VHOST_GOODCOPY_LEN
+                            && !vhost_exceeds_maxpend(net)
+                            && vhost_net_tx_select_zcopy(net);
 
                /* use msg_control to pass vhost zerocopy ubuf info to skb */
                if (zcopy_used) {
@@ -562,10 +669,8 @@ static void handle_tx(struct vhost_net *net)
                        msg.msg_control = NULL;
                        ubufs = NULL;
                }
-
                total_len += len;
-               if (total_len < VHOST_NET_WEIGHT &&
-                   !vhost_vq_avail_empty(&net->dev, vq) &&
+               if (tx_can_batch(vq, total_len) &&
                    likely(!vhost_exceeds_maxpend(net))) {
                        msg.msg_flags |= MSG_MORE;
                } else {
@@ -592,12 +697,37 @@ static void handle_tx(struct vhost_net *net)
                else
                        vhost_zerocopy_signal_used(net, vq);
                vhost_net_tx_packet(net);
-               if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
-                   unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT)) {
+               if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) {
                        vhost_poll_queue(&vq->poll);
                        break;
                }
        }
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_tx(struct vhost_net *net)
+{
+       struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+       struct vhost_virtqueue *vq = &nvq->vq;
+       struct socket *sock;
+
+       mutex_lock(&vq->mutex);
+       sock = vq->private_data;
+       if (!sock)
+               goto out;
+
+       if (!vq_iotlb_prefetch(vq))
+               goto out;
+
+       vhost_disable_notify(&net->dev, vq);
+       vhost_net_disable_vq(net, vq);
+
+       if (vhost_sock_zcopy(sock))
+               handle_tx_zerocopy(net, sock);
+       else
+               handle_tx_copy(net, sock);
+
 out:
        mutex_unlock(&vq->mutex);
 }
@@ -633,53 +763,50 @@ static int sk_has_rx_data(struct sock *sk)
        return skb_queue_empty(&sk->sk_receive_queue);
 }
 
-static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
+static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
+                                     bool *busyloop_intr)
 {
-       struct vhost_virtqueue *vq = &nvq->vq;
-       struct vhost_dev *dev = vq->dev;
-
-       if (!nvq->done_idx)
-               return;
-
-       vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
-       nvq->done_idx = 0;
-}
-
-static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
-{
-       struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
-       struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
-       struct vhost_virtqueue *vq = &nvq->vq;
+       struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
+       struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
+       struct vhost_virtqueue *rvq = &rnvq->vq;
+       struct vhost_virtqueue *tvq = &tnvq->vq;
        unsigned long uninitialized_var(endtime);
-       int len = peek_head_len(rvq, sk);
+       int len = peek_head_len(rnvq, sk);
 
-       if (!len && vq->busyloop_timeout) {
+       if (!len && tvq->busyloop_timeout) {
                /* Flush batched heads first */
-               vhost_rx_signal_used(rvq);
+               vhost_net_signal_used(rnvq);
                /* Both tx vq and rx socket were polled here */
-               mutex_lock_nested(&vq->mutex, 1);
-               vhost_disable_notify(&net->dev, vq);
+               mutex_lock_nested(&tvq->mutex, 1);
+               vhost_disable_notify(&net->dev, tvq);
 
                preempt_disable();
-               endtime = busy_clock() + vq->busyloop_timeout;
+               endtime = busy_clock() + tvq->busyloop_timeout;
 
-               while (vhost_can_busy_poll(&net->dev, endtime) &&
-                      !sk_has_rx_data(sk) &&
-                      vhost_vq_avail_empty(&net->dev, vq))
+               while (vhost_can_busy_poll(endtime)) {
+                       if (vhost_has_work(&net->dev)) {
+                               *busyloop_intr = true;
+                               break;
+                       }
+                       if ((sk_has_rx_data(sk) &&
+                            !vhost_vq_avail_empty(&net->dev, rvq)) ||
+                           !vhost_vq_avail_empty(&net->dev, tvq))
+                               break;
                        cpu_relax();
+               }
 
                preempt_enable();
 
-               if (!vhost_vq_avail_empty(&net->dev, vq))
-                       vhost_poll_queue(&vq->poll);
-               else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
-                       vhost_disable_notify(&net->dev, vq);
-                       vhost_poll_queue(&vq->poll);
+               if (!vhost_vq_avail_empty(&net->dev, tvq)) {
+                       vhost_poll_queue(&tvq->poll);
+               } else if (unlikely(vhost_enable_notify(&net->dev, tvq))) {
+                       vhost_disable_notify(&net->dev, tvq);
+                       vhost_poll_queue(&tvq->poll);
                }
 
-               mutex_unlock(&vq->mutex);
+               mutex_unlock(&tvq->mutex);
 
-               len = peek_head_len(rvq, sk);
+               len = peek_head_len(rnvq, sk);
        }
 
        return len;
@@ -786,6 +913,7 @@ static void handle_rx(struct vhost_net *net)
        s16 headcount;
        size_t vhost_hlen, sock_hlen;
        size_t vhost_len, sock_len;
+       bool busyloop_intr = false;
        struct socket *sock;
        struct iov_iter fixup;
        __virtio16 num_buffers;
@@ -809,7 +937,8 @@ static void handle_rx(struct vhost_net *net)
                vq->log : NULL;
        mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
 
-       while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
+       while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
+                                                     &busyloop_intr))) {
                sock_len += sock_hlen;
                vhost_len = sock_len + vhost_hlen;
                headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
@@ -820,7 +949,9 @@ static void handle_rx(struct vhost_net *net)
                        goto out;
                /* OK, now we need to know about added descriptors. */
                if (!headcount) {
-                       if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+                       if (unlikely(busyloop_intr)) {
+                               vhost_poll_queue(&vq->poll);
+                       } else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
                                /* They have slipped one in as we were
                                 * doing that: check again. */
                                vhost_disable_notify(&net->dev, vq);
@@ -830,6 +961,7 @@ static void handle_rx(struct vhost_net *net)
                         * they refilled. */
                        goto out;
                }
+               busyloop_intr = false;
                if (nvq->rx_ring)
                        msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
                /* On overrun, truncate and discard */
@@ -885,20 +1017,22 @@ static void handle_rx(struct vhost_net *net)
                        goto out;
                }
                nvq->done_idx += headcount;
-               if (nvq->done_idx > VHOST_RX_BATCH)
-                       vhost_rx_signal_used(nvq);
+               if (nvq->done_idx > VHOST_NET_BATCH)
+                       vhost_net_signal_used(nvq);
                if (unlikely(vq_log))
                        vhost_log_write(vq, vq_log, log, vhost_len);
                total_len += vhost_len;
-               if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
-                   unlikely(++recv_pkts >= VHOST_NET_PKT_WEIGHT)) {
+               if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
                        vhost_poll_queue(&vq->poll);
                        goto out;
                }
        }
-       vhost_net_enable_vq(net, vq);
+       if (unlikely(busyloop_intr))
+               vhost_poll_queue(&vq->poll);
+       else
+               vhost_net_enable_vq(net, vq);
 out:
-       vhost_rx_signal_used(nvq);
+       vhost_net_signal_used(nvq);
        mutex_unlock(&vq->mutex);
 }
 
@@ -951,7 +1085,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
                return -ENOMEM;
        }
 
-       queue = kmalloc_array(VHOST_RX_BATCH, sizeof(void *),
+       queue = kmalloc_array(VHOST_NET_BATCH, sizeof(void *),
                              GFP_KERNEL);
        if (!queue) {
                kfree(vqs);
index a1b18082991b2088711a2bca42f173fa951e49e8..19db5f672a9d4f0ad89c278d17dbabe5b5b68e9a 100644 (file)
@@ -346,7 +346,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
        struct rxrpc_call *rxcall;
        struct msghdr msg;
        struct kvec iov[1];
-       size_t offset;
        s64 tx_total_len;
        int ret;
 
@@ -433,10 +432,10 @@ error_do_abort:
                rxrpc_kernel_abort_call(call->net->socket, rxcall,
                                        RX_USER_ABORT, ret, "KSD");
        } else {
-               offset = 0;
-               rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL,
-                                      0, &offset, false, &call->abort_code,
-                                      &call->service_id);
+               iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0);
+               rxrpc_kernel_recv_data(call->net->socket, rxcall,
+                                      &msg.msg_iter, false,
+                                      &call->abort_code, &call->service_id);
                ac->abort_code = call->abort_code;
                ac->responded = true;
        }
@@ -467,13 +466,14 @@ static void afs_deliver_to_call(struct afs_call *call)
               state == AFS_CALL_SV_AWAIT_ACK
               ) {
                if (state == AFS_CALL_SV_AWAIT_ACK) {
-                       size_t offset = 0;
+                       struct iov_iter iter;
+
+                       iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0);
                        ret = rxrpc_kernel_recv_data(call->net->socket,
-                                                    call->rxcall,
-                                                    NULL, 0, &offset, false,
+                                                    call->rxcall, &iter, false,
                                                     &remote_abort,
                                                     &call->service_id);
-                       trace_afs_recv_data(call, 0, offset, false, ret);
+                       trace_afs_recv_data(call, 0, 0, false, ret);
 
                        if (ret == -EINPROGRESS || ret == -EAGAIN)
                                return;
@@ -894,6 +894,8 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
                     bool want_more)
 {
        struct afs_net *net = call->net;
+       struct iov_iter iter;
+       struct kvec iov;
        enum afs_call_state state;
        u32 remote_abort = 0;
        int ret;
@@ -903,10 +905,14 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 
        ASSERTCMP(call->offset, <=, count);
 
-       ret = rxrpc_kernel_recv_data(net->socket, call->rxcall,
-                                    buf, count, &call->offset,
+       iov.iov_base = buf + call->offset;
+       iov.iov_len = count - call->offset;
+       iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset);
+
+       ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
                                     want_more, &remote_abort,
                                     &call->service_id);
+       call->offset += (count - call->offset) - iov_iter_count(&iter);
        trace_afs_recv_data(call, count, call->offset, want_more, ret);
        if (ret == 0 || ret == -EAGAIN)
                return ret;
index d66cc077730386e811679abcecbf7efebcdf11b4..4ca0b5c1819244970d0b6d20bbc0144d5f3d40be 100644 (file)
@@ -619,6 +619,7 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
 
 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
                                             const char *name, umode_t mode,
+                                            kuid_t uid, kgid_t gid,
                                             unsigned flags)
 {
        struct kernfs_node *kn;
@@ -661,8 +662,22 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
        kn->mode = mode;
        kn->flags = flags;
 
+       if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
+               struct iattr iattr = {
+                       .ia_valid = ATTR_UID | ATTR_GID,
+                       .ia_uid = uid,
+                       .ia_gid = gid,
+               };
+
+               ret = __kernfs_setattr(kn, &iattr);
+               if (ret < 0)
+                       goto err_out3;
+       }
+
        return kn;
 
+ err_out3:
+       idr_remove(&root->ino_idr, kn->id.ino);
  err_out2:
        kmem_cache_free(kernfs_node_cache, kn);
  err_out1:
@@ -672,11 +687,13 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 
 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
                                    const char *name, umode_t mode,
+                                   kuid_t uid, kgid_t gid,
                                    unsigned flags)
 {
        struct kernfs_node *kn;
 
-       kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
+       kn = __kernfs_new_node(kernfs_root(parent),
+                              name, mode, uid, gid, flags);
        if (kn) {
                kernfs_get(parent);
                kn->parent = parent;
@@ -946,6 +963,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
        root->next_generation = 1;
 
        kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
+                              GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
                               KERNFS_DIR);
        if (!kn) {
                idr_destroy(&root->ino_idr);
@@ -984,6 +1002,8 @@ void kernfs_destroy_root(struct kernfs_root *root)
  * @parent: parent in which to create a new directory
  * @name: name of the new directory
  * @mode: mode of the new directory
+ * @uid: uid of the new directory
+ * @gid: gid of the new directory
  * @priv: opaque data associated with the new directory
  * @ns: optional namespace tag of the directory
  *
@@ -991,13 +1011,15 @@ void kernfs_destroy_root(struct kernfs_root *root)
  */
 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
                                         const char *name, umode_t mode,
+                                        kuid_t uid, kgid_t gid,
                                         void *priv, const void *ns)
 {
        struct kernfs_node *kn;
        int rc;
 
        /* allocate */
-       kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
+       kn = kernfs_new_node(parent, name, mode | S_IFDIR,
+                            uid, gid, KERNFS_DIR);
        if (!kn)
                return ERR_PTR(-ENOMEM);
 
@@ -1028,7 +1050,8 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
        int rc;
 
        /* allocate */
-       kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
+       kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
+                            GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
        if (!kn)
                return ERR_PTR(-ENOMEM);
 
index 2015d8c45e4a6db421505540be7b7fd055700cfd..dbf5bc250bfdf39076066707f381d84e2857b0b4 100644 (file)
@@ -965,6 +965,8 @@ const struct file_operations kernfs_file_fops = {
  * @parent: directory to create the file in
  * @name: name of the file
  * @mode: mode of the file
+ * @uid: uid of the file
+ * @gid: gid of the file
  * @size: size of the file
  * @ops: kernfs operations for the file
  * @priv: private data for the file
@@ -975,7 +977,8 @@ const struct file_operations kernfs_file_fops = {
  */
 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
                                         const char *name,
-                                        umode_t mode, loff_t size,
+                                        umode_t mode, kuid_t uid, kgid_t gid,
+                                        loff_t size,
                                         const struct kernfs_ops *ops,
                                         void *priv, const void *ns,
                                         struct lock_class_key *key)
@@ -986,7 +989,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
 
        flags = KERNFS_FILE;
 
-       kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags);
+       kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
+                            uid, gid, flags);
        if (!kn)
                return ERR_PTR(-ENOMEM);
 
index 3d73fe9d56e235f51907eac11510aabae9fc3344..80cebcd94c9045add76d22037e05c466aa0829ff 100644 (file)
@@ -63,7 +63,7 @@ out_unlock:
        return ret;
 }
 
-static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
+int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
 {
        struct kernfs_iattrs *attrs;
        struct iattr *iattrs;
index 0f260dcca1770427e7f292fe8ac1d2abc410e029..3d83b114bb08059bb52e9a1cf998e74523e08b1f 100644 (file)
@@ -90,6 +90,7 @@ int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
 int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
                       u32 request_mask, unsigned int query_flags);
 ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
+int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
 
 /*
  * dir.c
@@ -104,6 +105,7 @@ void kernfs_put_active(struct kernfs_node *kn);
 int kernfs_add_one(struct kernfs_node *kn);
 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
                                    const char *name, umode_t mode,
+                                   kuid_t uid, kgid_t gid,
                                    unsigned flags);
 struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
                                                    unsigned int ino);
index 08ccabd7047f390151b8ab31f72baa21efeefb6f..5ffed48f3d0e7cebb0f58cd7b9923ae75c8e7c92 100644 (file)
@@ -21,6 +21,7 @@
  * @target: target node for the symlink to point to
  *
  * Returns the created node on success, ERR_PTR() value on error.
+ * Ownership of the link matches ownership of the target.
  */
 struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
                                       const char *name,
@@ -28,8 +29,16 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
 {
        struct kernfs_node *kn;
        int error;
+       kuid_t uid = GLOBAL_ROOT_UID;
+       kgid_t gid = GLOBAL_ROOT_GID;
 
-       kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK);
+       if (target->iattr) {
+               uid = target->iattr->ia_iattr.ia_uid;
+               gid = target->iattr->ia_iattr.ia_gid;
+       }
+
+       kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, uid, gid,
+                            KERNFS_LINK);
        if (!kn)
                return ERR_PTR(-ENOMEM);
 
index 58eba92a0e41fed0d5da68345be1c452815afb09..feeae8081c229d873f7d6226847b02b5ee583bc1 100644 (file)
@@ -40,6 +40,8 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
 int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 {
        struct kernfs_node *parent, *kn;
+       kuid_t uid;
+       kgid_t gid;
 
        BUG_ON(!kobj);
 
@@ -51,8 +53,11 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
        if (!parent)
                return -ENOENT;
 
+       kobject_get_ownership(kobj, &uid, &gid);
+
        kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
-                                 S_IRWXU | S_IRUGO | S_IXUGO, kobj, ns);
+                                 S_IRWXU | S_IRUGO | S_IXUGO, uid, gid,
+                                 kobj, ns);
        if (IS_ERR(kn)) {
                if (PTR_ERR(kn) == -EEXIST)
                        sysfs_warn_dup(parent, kobject_name(kobj));
index 5c13f29bfcdb72024bdee5828a453d73e6dd2b5a..052e5ad9a4d2122571389c1d3dc42e99dfda037e 100644 (file)
@@ -245,7 +245,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = {
 
 int sysfs_add_file_mode_ns(struct kernfs_node *parent,
                           const struct attribute *attr, bool is_bin,
-                          umode_t mode, const void *ns)
+                          umode_t mode, kuid_t uid, kgid_t gid, const void *ns)
 {
        struct lock_class_key *key = NULL;
        const struct kernfs_ops *ops;
@@ -302,8 +302,9 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
        if (!attr->ignore_lockdep)
                key = attr->key ?: (struct lock_class_key *)&attr->skey;
 #endif
-       kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops,
-                                 (void *)attr, ns, key);
+
+       kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid,
+                                 size, ops, (void *)attr, ns, key);
        if (IS_ERR(kn)) {
                if (PTR_ERR(kn) == -EEXIST)
                        sysfs_warn_dup(parent, attr->name);
@@ -312,12 +313,6 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
        return 0;
 }
 
-int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
-                  bool is_bin)
-{
-       return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL);
-}
-
 /**
  * sysfs_create_file_ns - create an attribute file for an object with custom ns
  * @kobj: object we're creating for
@@ -327,9 +322,14 @@ int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
 int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
                         const void *ns)
 {
+       kuid_t uid;
+       kgid_t gid;
+
        BUG_ON(!kobj || !kobj->sd || !attr);
 
-       return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
+       kobject_get_ownership(kobj, &uid, &gid);
+       return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode,
+                                     uid, gid, ns);
 
 }
 EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
@@ -358,6 +358,8 @@ int sysfs_add_file_to_group(struct kobject *kobj,
                const struct attribute *attr, const char *group)
 {
        struct kernfs_node *parent;
+       kuid_t uid;
+       kgid_t gid;
        int error;
 
        if (group) {
@@ -370,7 +372,9 @@ int sysfs_add_file_to_group(struct kobject *kobj,
        if (!parent)
                return -ENOENT;
 
-       error = sysfs_add_file(parent, attr, false);
+       kobject_get_ownership(kobj, &uid, &gid);
+       error = sysfs_add_file_mode_ns(parent, attr, false,
+                                      attr->mode, uid, gid, NULL);
        kernfs_put(parent);
 
        return error;
@@ -486,9 +490,14 @@ EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
 int sysfs_create_bin_file(struct kobject *kobj,
                          const struct bin_attribute *attr)
 {
+       kuid_t uid;
+       kgid_t gid;
+
        BUG_ON(!kobj || !kobj->sd || !attr);
 
-       return sysfs_add_file(kobj->sd, &attr->attr, true);
+       kobject_get_ownership(kobj, &uid, &gid);
+       return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true,
+                                     attr->attr.mode, uid, gid, NULL);
 }
 EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
 
index 4802ec0e1e3a5cff9d3e2c0c2d2f093ce4c391d7..c7a716c4acc961acddd1bf662f6f8f36c23a0e88 100644 (file)
@@ -31,6 +31,7 @@ static void remove_files(struct kernfs_node *parent,
 }
 
 static int create_files(struct kernfs_node *parent, struct kobject *kobj,
+                       kuid_t uid, kgid_t gid,
                        const struct attribute_group *grp, int update)
 {
        struct attribute *const *attr;
@@ -60,7 +61,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
 
                        mode &= SYSFS_PREALLOC | 0664;
                        error = sysfs_add_file_mode_ns(parent, *attr, false,
-                                                      mode, NULL);
+                                                      mode, uid, gid, NULL);
                        if (unlikely(error))
                                break;
                }
@@ -90,7 +91,8 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
                        mode &= SYSFS_PREALLOC | 0664;
                        error = sysfs_add_file_mode_ns(parent,
                                        &(*bin_attr)->attr, true,
-                                       mode, NULL);
+                                       mode,
+                                       uid, gid, NULL);
                        if (error)
                                break;
                }
@@ -106,6 +108,8 @@ static int internal_create_group(struct kobject *kobj, int update,
                                 const struct attribute_group *grp)
 {
        struct kernfs_node *kn;
+       kuid_t uid;
+       kgid_t gid;
        int error;
 
        BUG_ON(!kobj || (!update && !kobj->sd));
@@ -118,9 +122,11 @@ static int internal_create_group(struct kobject *kobj, int update,
                        kobj->name, grp->name ?: "");
                return -EINVAL;
        }
+       kobject_get_ownership(kobj, &uid, &gid);
        if (grp->name) {
-               kn = kernfs_create_dir(kobj->sd, grp->name,
-                                      S_IRWXU | S_IRUGO | S_IXUGO, kobj);
+               kn = kernfs_create_dir_ns(kobj->sd, grp->name,
+                                         S_IRWXU | S_IRUGO | S_IXUGO,
+                                         uid, gid, kobj, NULL);
                if (IS_ERR(kn)) {
                        if (PTR_ERR(kn) == -EEXIST)
                                sysfs_warn_dup(kobj->sd, grp->name);
@@ -129,7 +135,7 @@ static int internal_create_group(struct kobject *kobj, int update,
        } else
                kn = kobj->sd;
        kernfs_get(kn);
-       error = create_files(kn, kobj, grp, update);
+       error = create_files(kn, kobj, uid, gid, grp, update);
        if (error) {
                if (grp->name)
                        kernfs_remove(kn);
@@ -281,6 +287,8 @@ int sysfs_merge_group(struct kobject *kobj,
                       const struct attribute_group *grp)
 {
        struct kernfs_node *parent;
+       kuid_t uid;
+       kgid_t gid;
        int error = 0;
        struct attribute *const *attr;
        int i;
@@ -289,8 +297,11 @@ int sysfs_merge_group(struct kobject *kobj,
        if (!parent)
                return -ENOENT;
 
+       kobject_get_ownership(kobj, &uid, &gid);
+
        for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
-               error = sysfs_add_file(parent, *attr, false);
+               error = sysfs_add_file_mode_ns(parent, *attr, false,
+                                              (*attr)->mode, uid, gid, NULL);
        if (error) {
                while (--i >= 0)
                        kernfs_remove_by_name(parent, (*--attr)->name);
index d098e015fcc94aacdb1f81b4298ee7193f20fb3f..0050cc0c0236de8a13166dd100076651e95c0cad 100644 (file)
@@ -27,11 +27,10 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name);
 /*
  * file.c
  */
-int sysfs_add_file(struct kernfs_node *parent,
-                  const struct attribute *attr, bool is_bin);
 int sysfs_add_file_mode_ns(struct kernfs_node *parent,
                           const struct attribute *attr, bool is_bin,
-                          umode_t amode, const void *ns);
+                          umode_t amode, kuid_t uid, kgid_t gid,
+                          const void *ns);
 
 /*
  * symlink.c
index cf2588d81148972caffee512f5136d0255e0d92a..65a6981eef7bf5109e44f083ff5fcbb0dc64e414 100644 (file)
                (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
        })
 
-extern void __compiletime_warning("value doesn't fit into mask")
+extern void __compiletime_error("value doesn't fit into mask")
 __field_overflow(void);
 extern void __compiletime_error("bad bitfield mask")
 __bad_mask(void);
@@ -121,8 +121,8 @@ static __always_inline u64 field_mask(u64 field)
 #define ____MAKE_OP(type,base,to,from)                                 \
 static __always_inline __##type type##_encode_bits(base v, base field) \
 {                                                                      \
-        if (__builtin_constant_p(v) && (v & ~field_multiplier(field))) \
-                           __field_overflow();                         \
+       if (__builtin_constant_p(v) && (v & ~field_mask(field)))        \
+               __field_overflow();                                     \
        return to((v & field_mask(field)) * field_multiplier(field));   \
 }                                                                      \
 static __always_inline __##type type##_replace_bits(__##type old,      \
@@ -143,6 +143,7 @@ static __always_inline base type##_get_bits(__##type v, base field) \
        ____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \
        ____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \
        ____MAKE_OP(u##size,u##size,,)
+____MAKE_OP(u8,u8,,)
 __MAKE_OP(16)
 __MAKE_OP(32)
 __MAKE_OP(64)
index 8827e797ff97d0973ddf1d4217a885cee9bb63ee..5b5ad95cf33950f46727f58b36feb2841069aa88 100644 (file)
@@ -85,6 +85,7 @@ struct bpf_map {
        char name[BPF_OBJ_NAME_LEN];
 };
 
+struct bpf_offload_dev;
 struct bpf_offloaded_map;
 
 struct bpf_map_dev_ops {
@@ -352,7 +353,7 @@ struct bpf_prog_array {
        struct bpf_prog *progs[0];
 };
 
-struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
+struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
 void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
 int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
 int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
@@ -648,7 +649,15 @@ int bpf_map_offload_delete_elem(struct bpf_map *map, void *key);
 int bpf_map_offload_get_next_key(struct bpf_map *map,
                                 void *key, void *next_key);
 
-bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);
+bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
+
+struct bpf_offload_dev *bpf_offload_dev_create(void);
+void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev);
+int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
+                                   struct net_device *netdev);
+void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
+                                      struct net_device *netdev);
+bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
index 055aaf5ed9af38a839680c0570a8158db5e874b5..a83e1f632eb70eeda82581403f2916e7c7346f79 100644 (file)
@@ -143,7 +143,12 @@ u8 can_dlc2len(u8 can_dlc);
 /* map the sanitized data length to an appropriate data length code */
 u8 can_len2dlc(u8 len);
 
-struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
+struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
+                                   unsigned int txqs, unsigned int rxqs);
+#define alloc_candev(sizeof_priv, echo_skb_max) \
+       alloc_candev_mqs(sizeof_priv, echo_skb_max, 1, 1)
+#define alloc_candev_mq(sizeof_priv, echo_skb_max, count) \
+       alloc_candev_mqs(sizeof_priv, echo_skb_max, count, count)
 void free_candev(struct net_device *dev);
 
 /* a candev safe wrapper around netdev_priv */
index bf53d893ad02bbe460dd64ce03d8cfe10d709931..57f20a0a7794908b47fdb151e530d53e3a598b54 100644 (file)
@@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_active(cpu)                ((cpu) == 0)
 #endif
 
-/* verify cpu argument to cpumask_* operators */
-static inline unsigned int cpumask_check(unsigned int cpu)
+static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
-       WARN_ON_ONCE(cpu >= nr_cpumask_bits);
+       WARN_ON_ONCE(cpu >= bits);
 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+}
+
+/* verify cpu argument to cpumask_* operators */
+static inline unsigned int cpumask_check(unsigned int cpu)
+{
+       cpu_max_bits_warn(cpu, nr_cpumask_bits);
        return cpu;
 }
 
index 055a69dbcd18a8a5286d8e612fe16c1ae6245455..fe6ccb6dc11964cacd63761af6fe7b2cb57f6133 100644 (file)
@@ -384,6 +384,9 @@ int subsys_virtual_register(struct bus_type *subsys,
  * @shutdown_pre: Called at shut-down time before driver shutdown.
  * @ns_type:   Callbacks so sysfs can detemine namespaces.
  * @namespace: Namespace of the device belongs to this class.
+ * @get_ownership: Allows class to specify uid/gid of the sysfs directories
+ *             for the devices belonging to the class. Usually tied to
+ *             device's namespace.
  * @pm:                The default device power management operations of this class.
  * @p:         The private data of the driver core, no one other than the
  *             driver core can touch this.
@@ -413,6 +416,8 @@ struct class {
        const struct kobj_ns_type_operations *ns_type;
        const void *(*namespace)(struct device *dev);
 
+       void (*get_ownership)(struct device *dev, kuid_t *uid, kgid_t *gid);
+
        const struct dev_pm_ops *pm;
 
        struct subsys_private *p;
index 79563840c295cfcd40d17287eebcf793ee010020..572e11bb869672cbc9b6da8a375760b192f9eaf9 100644 (file)
@@ -59,8 +59,7 @@ struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
                                           unsigned int rxqs);
 #define devm_alloc_etherdev(dev, sizeof_priv) devm_alloc_etherdev_mqs(dev, sizeof_priv, 1, 1)
 
-struct sk_buff **eth_gro_receive(struct sk_buff **head,
-                                struct sk_buff *skb);
+struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb);
 int eth_gro_complete(struct sk_buff *skb, int nhoff);
 
 /* Reserved Ethernet Addresses per IEEE 802.1Q */
index b462d9ea80078c01980b0ff596fd80bc10d4870c..dc3dac40f0698f28824481371514ce1974599be5 100644 (file)
@@ -11,9 +11,8 @@
 
 /*
  * qoriq ptp registers
- * Generated by regen.tcl on Thu May 13 01:38:57 PM CEST 2010
  */
-struct qoriq_ptp_registers {
+struct ctrl_regs {
        u32 tmr_ctrl;     /* Timer control register */
        u32 tmr_tevent;   /* Timestamp event register */
        u32 tmr_temask;   /* Timer event mask register */
@@ -28,22 +27,47 @@ struct qoriq_ptp_registers {
        u8  res1[4];
        u32 tmroff_h;     /* Timer offset high */
        u32 tmroff_l;     /* Timer offset low */
-       u8  res2[8];
+};
+
+struct alarm_regs {
        u32 tmr_alarm1_h; /* Timer alarm 1 high register */
        u32 tmr_alarm1_l; /* Timer alarm 1 high register */
        u32 tmr_alarm2_h; /* Timer alarm 2 high register */
        u32 tmr_alarm2_l; /* Timer alarm 2 high register */
-       u8  res3[48];
+};
+
+struct fiper_regs {
        u32 tmr_fiper1;   /* Timer fixed period interval */
        u32 tmr_fiper2;   /* Timer fixed period interval */
        u32 tmr_fiper3;   /* Timer fixed period interval */
-       u8  res4[20];
+};
+
+struct etts_regs {
        u32 tmr_etts1_h;  /* Timestamp of general purpose external trigger */
        u32 tmr_etts1_l;  /* Timestamp of general purpose external trigger */
        u32 tmr_etts2_h;  /* Timestamp of general purpose external trigger */
        u32 tmr_etts2_l;  /* Timestamp of general purpose external trigger */
 };
 
+struct qoriq_ptp_registers {
+       struct ctrl_regs __iomem *ctrl_regs;
+       struct alarm_regs __iomem *alarm_regs;
+       struct fiper_regs __iomem *fiper_regs;
+       struct etts_regs __iomem *etts_regs;
+};
+
+/* Offset definitions for the four register groups */
+#define CTRL_REGS_OFFSET       0x0
+#define ALARM_REGS_OFFSET      0x40
+#define FIPER_REGS_OFFSET      0x80
+#define ETTS_REGS_OFFSET       0xa0
+
+#define FMAN_CTRL_REGS_OFFSET  0x80
+#define FMAN_ALARM_REGS_OFFSET 0xb8
+#define FMAN_FIPER_REGS_OFFSET 0xd0
+#define FMAN_ETTS_REGS_OFFSET  0xe0
+
+
 /* Bit definitions for the TMR_CTRL register */
 #define ALM1P                 (1<<31) /* Alarm1 output polarity */
 #define ALM2P                 (1<<30) /* Alarm2 output polarity */
@@ -105,10 +129,10 @@ struct qoriq_ptp_registers {
 #define DRIVER         "ptp_qoriq"
 #define DEFAULT_CKSEL  1
 #define N_EXT_TS       2
-#define REG_SIZE       sizeof(struct qoriq_ptp_registers)
 
 struct qoriq_ptp {
-       struct qoriq_ptp_registers __iomem *regs;
+       void __iomem *base;
+       struct qoriq_ptp_registers regs;
        spinlock_t lock; /* protects regs */
        struct ptp_clock *clock;
        struct ptp_clock_info caps;
index e5fd2707b6df79f4b8d1d6bb4bf01e981e379f3a..9493d4a388dbb9a3ac71b0fe9e56566eb90c8327 100644 (file)
@@ -93,6 +93,7 @@ enum hwmon_temp_attributes {
 #define HWMON_T_MIN_ALARM      BIT(hwmon_temp_min_alarm)
 #define HWMON_T_MAX_ALARM      BIT(hwmon_temp_max_alarm)
 #define HWMON_T_CRIT_ALARM     BIT(hwmon_temp_crit_alarm)
+#define HWMON_T_LCRIT_ALARM    BIT(hwmon_temp_lcrit_alarm)
 #define HWMON_T_EMERGENCY_ALARM        BIT(hwmon_temp_emergency_alarm)
 #define HWMON_T_FAULT          BIT(hwmon_temp_fault)
 #define HWMON_T_OFFSET         BIT(hwmon_temp_offset)
@@ -187,12 +188,16 @@ enum hwmon_power_attributes {
        hwmon_power_cap_hyst,
        hwmon_power_cap_max,
        hwmon_power_cap_min,
+       hwmon_power_min,
        hwmon_power_max,
        hwmon_power_crit,
+       hwmon_power_lcrit,
        hwmon_power_label,
        hwmon_power_alarm,
        hwmon_power_cap_alarm,
+       hwmon_power_min_alarm,
        hwmon_power_max_alarm,
+       hwmon_power_lcrit_alarm,
        hwmon_power_crit_alarm,
 };
 
@@ -213,12 +218,16 @@ enum hwmon_power_attributes {
 #define HWMON_P_CAP_HYST               BIT(hwmon_power_cap_hyst)
 #define HWMON_P_CAP_MAX                        BIT(hwmon_power_cap_max)
 #define HWMON_P_CAP_MIN                        BIT(hwmon_power_cap_min)
+#define HWMON_P_MIN                    BIT(hwmon_power_min)
 #define HWMON_P_MAX                    BIT(hwmon_power_max)
+#define HWMON_P_LCRIT                  BIT(hwmon_power_lcrit)
 #define HWMON_P_CRIT                   BIT(hwmon_power_crit)
 #define HWMON_P_LABEL                  BIT(hwmon_power_label)
 #define HWMON_P_ALARM                  BIT(hwmon_power_alarm)
 #define HWMON_P_CAP_ALARM              BIT(hwmon_power_cap_alarm)
+#define HWMON_P_MIN_ALARM              BIT(hwmon_power_max_alarm)
 #define HWMON_P_MAX_ALARM              BIT(hwmon_power_max_alarm)
+#define HWMON_P_LCRIT_ALARM            BIT(hwmon_power_lcrit_alarm)
 #define HWMON_P_CRIT_ALARM             BIT(hwmon_power_crit_alarm)
 
 enum hwmon_energy_attributes {
@@ -389,4 +398,27 @@ devm_hwmon_device_register_with_info(struct device *dev,
 void hwmon_device_unregister(struct device *dev);
 void devm_hwmon_device_unregister(struct device *dev);
 
+/**
+ * hwmon_is_bad_char - Is the char invalid in a hwmon name
+ * @ch: the char to be considered
+ *
+ * hwmon_is_bad_char() can be used to determine if the given character
+ * may not be used in a hwmon name.
+ *
+ * Returns true if the char is invalid, false otherwise.
+ */
+static inline bool hwmon_is_bad_char(const char ch)
+{
+       switch (ch) {
+       case '-':
+       case '*':
+       case ' ':
+       case '\t':
+       case '\n':
+               return true;
+       default:
+               return false;
+       }
+}
+
 #endif
index 8fe7e4306816f44c586e860744031ae3d038df6f..9c03a7d5e400df9f2853c60c9816c150c447d937 100644 (file)
@@ -1433,11 +1433,13 @@ struct ieee80211_ht_operation {
 #define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800
 
 /*
- * A-PMDU buffer sizes
- * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
+ * A-MPDU buffer sizes
+ * According to HT size varies from 8 to 64 frames
+ * HE adds the ability to have up to 256 frames.
  */
-#define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
+#define IEEE80211_MIN_AMPDU_BUF                0x8
+#define IEEE80211_MAX_AMPDU_BUF_HT     0x40
+#define IEEE80211_MAX_AMPDU_BUF                0x100
 
 
 /* Spatial Multiplexing Power Save Modes (for capability) */
@@ -1539,6 +1541,106 @@ struct ieee80211_vht_operation {
        __le16 basic_mcs_set;
 } __packed;
 
+/**
+ * struct ieee80211_he_cap_elem - HE capabilities element
+ *
+ * This structure is the "HE capabilities element" fixed fields as
+ * described in P802.11ax_D2.0 section 9.4.2.237.2 and 9.4.2.237.3
+ */
+struct ieee80211_he_cap_elem {
+       u8 mac_cap_info[5];
+       u8 phy_cap_info[9];
+} __packed;
+
+#define IEEE80211_TX_RX_MCS_NSS_DESC_MAX_LEN   5
+
+/**
+ * enum ieee80211_he_mcs_support - HE MCS support definitions
+ * @IEEE80211_HE_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the
+ *     number of streams
+ * @IEEE80211_HE_MCS_SUPPORT_0_9: MCSes 0-9 are supported
+ * @IEEE80211_HE_MCS_SUPPORT_0_11: MCSes 0-11 are supported
+ * @IEEE80211_HE_MCS_NOT_SUPPORTED: This number of streams isn't supported
+ *
+ * These definitions are used in each 2-bit subfield of the rx_mcs_*
+ * and tx_mcs_* fields of &struct ieee80211_he_mcs_nss_supp, which are
+ * both split into 8 subfields by number of streams. These values indicate
+ * which MCSes are supported for the number of streams the value appears
+ * for.
+ */
+enum ieee80211_he_mcs_support {
+       IEEE80211_HE_MCS_SUPPORT_0_7    = 0,
+       IEEE80211_HE_MCS_SUPPORT_0_9    = 1,
+       IEEE80211_HE_MCS_SUPPORT_0_11   = 2,
+       IEEE80211_HE_MCS_NOT_SUPPORTED  = 3,
+};
+
+/**
+ * struct ieee80211_he_mcs_nss_supp - HE Tx/Rx HE MCS NSS Support Field
+ *
+ * This structure holds the data required for the Tx/Rx HE MCS NSS Support Field
+ * described in P802.11ax_D2.0 section 9.4.2.237.4
+ *
+ * @rx_mcs_80: Rx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     widths less than 80MHz.
+ * @tx_mcs_80: Tx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     widths less than 80MHz.
+ * @rx_mcs_160: Rx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     width 160MHz.
+ * @tx_mcs_160: Tx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     width 160MHz.
+ * @rx_mcs_80p80: Rx MCS map 2 bits for each stream, total 8 streams, for
+ *     channel width 80p80MHz.
+ * @tx_mcs_80p80: Tx MCS map 2 bits for each stream, total 8 streams, for
+ *     channel width 80p80MHz.
+ */
+struct ieee80211_he_mcs_nss_supp {
+       __le16 rx_mcs_80;
+       __le16 tx_mcs_80;
+       __le16 rx_mcs_160;
+       __le16 tx_mcs_160;
+       __le16 rx_mcs_80p80;
+       __le16 tx_mcs_80p80;
+} __packed;
+
+/**
+ * struct ieee80211_he_operation - HE capabilities element
+ *
+ * This structure is the "HE operation element" fields as
+ * described in P802.11ax_D2.0 section 9.4.2.238
+ */
+struct ieee80211_he_operation {
+       __le32 he_oper_params;
+       __le16 he_mcs_nss_set;
+       /* Optional 0,1,3 or 4 bytes: depends on @he_oper_params */
+       u8 optional[0];
+} __packed;
+
+/**
+ * struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field
+ *
+ * This structure is the "MU AC Parameter Record" fields as
+ * described in P802.11ax_D2.0 section 9.4.2.240
+ */
+struct ieee80211_he_mu_edca_param_ac_rec {
+       u8 aifsn;
+       u8 ecw_min_max;
+       u8 mu_edca_timer;
+} __packed;
+
+/**
+ * struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element
+ *
+ * This structure is the "MU EDCA Parameter Set element" fields as
+ * described in P802.11ax_D2.0 section 9.4.2.240
+ */
+struct ieee80211_mu_edca_param_set {
+       u8 mu_qos_info;
+       struct ieee80211_he_mu_edca_param_ac_rec ac_be;
+       struct ieee80211_he_mu_edca_param_ac_rec ac_bk;
+       struct ieee80211_he_mu_edca_param_ac_rec ac_vi;
+       struct ieee80211_he_mu_edca_param_ac_rec ac_vo;
+} __packed;
 
 /* 802.11ac VHT Capabilities */
 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895                 0x00000000
@@ -1577,6 +1679,328 @@ struct ieee80211_vht_operation {
 #define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN                   0x10000000
 #define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN                   0x20000000
 
+/* 802.11ax HE MAC capabilities */
+#define IEEE80211_HE_MAC_CAP0_HTC_HE                           0x01
+#define IEEE80211_HE_MAC_CAP0_TWT_REQ                          0x02
+#define IEEE80211_HE_MAC_CAP0_TWT_RES                          0x04
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_NOT_SUPP            0x00
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_1             0x08
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_2             0x10
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_3             0x18
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK                        0x18
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_1              0x00
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_2              0x20
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_4              0x40
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_8              0x60
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_16             0x80
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_32             0xa0
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_64             0xc0
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_UNLIMITED      0xe0
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_MASK           0xe0
+
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_UNLIMITED          0x00
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_128                        0x01
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_256                        0x02
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_512                        0x03
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_MASK               0x03
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_0US               0x00
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_8US               0x04
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US              0x08
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK              0x0c
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_1              0x00
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_2              0x10
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_3              0x20
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_4              0x30
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_5              0x40
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_6              0x50
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_7              0x60
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8              0x70
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_MASK           0x70
+
+/* Link adaptation is split between byte HE_MAC_CAP1 and
+ * HE_MAC_CAP2. It should be set only if IEEE80211_HE_MAC_CAP0_HTC_HE
+ * in which case the following values apply:
+ * 0 = No feedback.
+ * 1 = reserved.
+ * 2 = Unsolicited feedback.
+ * 3 = both
+ */
+#define IEEE80211_HE_MAC_CAP1_LINK_ADAPTATION                  0x80
+
+#define IEEE80211_HE_MAC_CAP2_LINK_ADAPTATION                  0x01
+#define IEEE80211_HE_MAC_CAP2_ALL_ACK                          0x02
+#define IEEE80211_HE_MAC_CAP2_UL_MU_RESP_SCHED                 0x04
+#define IEEE80211_HE_MAC_CAP2_BSR                              0x08
+#define IEEE80211_HE_MAC_CAP2_BCAST_TWT                                0x10
+#define IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP                  0x20
+#define IEEE80211_HE_MAC_CAP2_MU_CASCADING                     0x40
+#define IEEE80211_HE_MAC_CAP2_ACK_EN                           0x80
+
+#define IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU      0x01
+#define IEEE80211_HE_MAC_CAP3_OMI_CONTROL                      0x02
+#define IEEE80211_HE_MAC_CAP3_OFDMA_RA                         0x04
+
+/* The maximum length of an A-MDPU is defined by the combination of the Maximum
+ * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the
+ * same field in the HE capabilities.
+ */
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_USE_VHT      0x00
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_1                0x08
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2                0x10
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_RESERVED     0x18
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_MASK         0x18
+#define IEEE80211_HE_MAC_CAP3_A_AMSDU_FRAG                     0x20
+#define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED                   0x40
+#define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS                0x80
+
+#define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG             0x01
+#define IEEE80211_HE_MAC_CAP4_QTP                              0x02
+#define IEEE80211_HE_MAC_CAP4_BQR                              0x04
+#define IEEE80211_HE_MAC_CAP4_SR_RESP                          0x08
+#define IEEE80211_HE_MAC_CAP4_NDP_FB_REP                       0x10
+#define IEEE80211_HE_MAC_CAP4_OPS                              0x20
+#define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU                   0x40
+
+/* 802.11ax HE PHY capabilities */
+#define IEEE80211_HE_PHY_CAP0_DUAL_BAND                                        0x01
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G            0x02
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G      0x04
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G           0x08
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G     0x10
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G       0x20
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G       0x40
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK                   0xfe
+
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_20MHZ 0x01
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_40MHZ 0x02
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_20MHZ        0x04
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_40MHZ        0x08
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK                    0x0f
+#define IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A                           0x10
+#define IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD                   0x20
+#define IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US         0x40
+/* Midamble RX Max NSTS is split between byte #2 and byte #3 */
+#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS                     0x80
+
+#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_MAX_NSTS                     0x01
+#define IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US                     0x02
+#define IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ                      0x04
+#define IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ                      0x08
+#define IEEE80211_HE_PHY_CAP2_DOPPLER_TX                               0x10
+#define IEEE80211_HE_PHY_CAP2_DOPPLER_RX                               0x20
+
+/* Note that the meaning of UL MU below is different between an AP and a non-AP
+ * sta, where in the AP case it indicates support for Rx and in the non-AP sta
+ * case it indicates support for Tx.
+ */
+#define IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO                       0x40
+#define IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO                    0x80
+
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM                  0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK                    0x01
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_QPSK                    0x02
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_16_QAM                  0x03
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK                    0x03
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1                         0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_2                         0x04
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM                  0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_BPSK                    0x08
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_QPSK                    0x10
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_16_QAM                  0x18
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK                    0x18
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1                         0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_2                         0x20
+#define IEEE80211_HE_PHY_CAP3_RX_HE_MU_PPDU_FROM_NON_AP_STA            0x40
+#define IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER                            0x80
+
+#define IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE                            0x01
+#define IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER                            0x02
+
+/* Minimal allowed value of Max STS under 80MHz is 3 */
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4         0x0c
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_5         0x10
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_6         0x14
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_7         0x18
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8         0x1c
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_MASK      0x1c
+
+/* Minimal allowed value of Max STS above 80MHz is 3 */
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4         0x60
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_5         0x80
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_6         0xa0
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_7         0xc0
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_8         0xe0
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_MASK      0xe0
+
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_1     0x00
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2     0x01
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_3     0x02
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_4     0x03
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_5     0x04
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_6     0x05
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_7     0x06
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_8     0x07
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK  0x07
+
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_1     0x00
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2     0x08
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_3     0x10
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_4     0x18
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_5     0x20
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_6     0x28
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_7     0x30
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_8     0x38
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK  0x38
+
+#define IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK                         0x40
+#define IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK                         0x80
+
+#define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU                      0x01
+#define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU                      0x02
+#define IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB                    0x04
+#define IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB                    0x08
+#define IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB                              0x10
+#define IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE                     0x20
+#define IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO              0x40
+#define IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT                    0x80
+
+#define IEEE80211_HE_PHY_CAP7_SRP_BASED_SR                             0x01
+#define IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR                    0x02
+#define IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI         0x04
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_1                                 0x08
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_2                                 0x10
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_3                                 0x18
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_4                                 0x20
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_5                                 0x28
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_6                                 0x30
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_7                                 0x38
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_MASK                              0x38
+#define IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ                      0x40
+#define IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ                      0x80
+
+#define IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI         0x01
+#define IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G             0x02
+#define IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU                  0x04
+#define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU                  0x08
+#define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI              0x10
+#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_2X_AND_1XLTF                 0x20
+
+/* 802.11ax HE TX/RX MCS NSS Support  */
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS                   (3)
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_POS                     (6)
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_POS                     (11)
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_MASK                    0x07c0
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_MASK                    0xf800
+
+/* TX/RX HE MCS Support field Highest MCS subfield encoding */
+enum ieee80211_he_highest_mcs_supported_subfield_enc {
+       HIGHEST_MCS_SUPPORTED_MCS7 = 0,
+       HIGHEST_MCS_SUPPORTED_MCS8,
+       HIGHEST_MCS_SUPPORTED_MCS9,
+       HIGHEST_MCS_SUPPORTED_MCS10,
+       HIGHEST_MCS_SUPPORTED_MCS11,
+};
+
+/* Calculate 802.11ax HE capabilities IE Tx/Rx HE MCS NSS Support Field size */
+static inline u8
+ieee80211_he_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap)
+{
+       u8 count = 4;
+
+       if (he_cap->phy_cap_info[0] &
+           IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G)
+               count += 4;
+
+       if (he_cap->phy_cap_info[0] &
+           IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)
+               count += 4;
+
+       return count;
+}
+
+/* 802.11ax HE PPE Thresholds */
+#define IEEE80211_PPE_THRES_NSS_SUPPORT_2NSS                   (1)
+#define IEEE80211_PPE_THRES_NSS_POS                            (0)
+#define IEEE80211_PPE_THRES_NSS_MASK                           (7)
+#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_2x966_AND_966_RU  \
+       (BIT(5) | BIT(6))
+#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK              0x78
+#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_POS               (3)
+#define IEEE80211_PPE_THRES_INFO_PPET_SIZE                     (3)
+
+/*
+ * Calculate 802.11ax HE capabilities IE PPE field size
+ * Input: Header byte of ppe_thres (first byte), and HE capa IE's PHY cap u8*
+ */
+static inline u8
+ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
+{
+       u8 n;
+
+       if ((phy_cap_info[6] &
+            IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) == 0)
+               return 0;
+
+       n = hweight8(ppe_thres_hdr &
+                    IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK);
+       n *= (1 + ((ppe_thres_hdr & IEEE80211_PPE_THRES_NSS_MASK) >>
+                  IEEE80211_PPE_THRES_NSS_POS));
+
+       /*
+        * Each pair is 6 bits, and we need to add the 7 "header" bits to the
+        * total size.
+        */
+       n = (n * IEEE80211_PPE_THRES_INFO_PPET_SIZE * 2) + 7;
+       n = DIV_ROUND_UP(n, 8);
+
+       return n;
+}
+
+/* HE Operation defines */
+#define IEEE80211_HE_OPERATION_BSS_COLOR_MASK                  0x0000003f
+#define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK           0x000001c0
+#define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_OFFSET         6
+#define IEEE80211_HE_OPERATION_TWT_REQUIRED                    0x00000200
+#define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK              0x000ffc00
+#define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET            10
+#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR               0x000100000
+#define IEEE80211_HE_OPERATION_VHT_OPER_INFO                   0x000200000
+#define IEEE80211_HE_OPERATION_MULTI_BSSID_AP                  0x10000000
+#define IEEE80211_HE_OPERATION_TX_BSSID_INDICATOR              0x20000000
+#define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED              0x40000000
+
+/*
+ * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size
+ * @he_oper_ie: byte data of the He Operations IE, stating from the the byte
+ *     after the ext ID byte. It is assumed that he_oper_ie has at least
+ *     sizeof(struct ieee80211_he_operation) bytes, checked already in
+ *     ieee802_11_parse_elems_crc()
+ * @return the actual size of the IE data (not including header), or 0 on error
+ */
+static inline u8
+ieee80211_he_oper_size(const u8 *he_oper_ie)
+{
+       struct ieee80211_he_operation *he_oper = (void *)he_oper_ie;
+       u8 oper_len = sizeof(struct ieee80211_he_operation);
+       u32 he_oper_params;
+
+       /* Make sure the input is not NULL */
+       if (!he_oper_ie)
+               return 0;
+
+       /* Calc required length */
+       he_oper_params = le32_to_cpu(he_oper->he_oper_params);
+       if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO)
+               oper_len += 3;
+       if (he_oper_params & IEEE80211_HE_OPERATION_MULTI_BSSID_AP)
+               oper_len++;
+
+       /* Add the first byte (extension ID) to the total length */
+       oper_len++;
+
+       return oper_len;
+}
+
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
@@ -1992,6 +2416,11 @@ enum ieee80211_eid_ext {
        WLAN_EID_EXT_FILS_WRAPPED_DATA = 8,
        WLAN_EID_EXT_FILS_PUBLIC_KEY = 12,
        WLAN_EID_EXT_FILS_NONCE = 13,
+       WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE = 14,
+       WLAN_EID_EXT_HE_CAPABILITY = 35,
+       WLAN_EID_EXT_HE_OPERATION = 36,
+       WLAN_EID_EXT_UORA = 37,
+       WLAN_EID_EXT_HE_MU_EDCA = 38,
 };
 
 /* Action category code */
index d95cae09dea0873a0cb119e63f5c3d6d7c73d823..ac42da56f7a28f375dbc50887fee19eaf97c2327 100644 (file)
@@ -74,6 +74,11 @@ struct team_port {
        long mode_priv[0];
 };
 
+static inline struct team_port *team_port_get_rcu(const struct net_device *dev)
+{
+       return rcu_dereference(dev->rx_handler_data);
+}
+
 static inline bool team_port_enabled(struct team_port *port)
 {
        return port->index != -1;
@@ -84,6 +89,19 @@ static inline bool team_port_txable(struct team_port *port)
        return port->linkup && team_port_enabled(port);
 }
 
+static inline bool team_port_dev_txable(const struct net_device *port_dev)
+{
+       struct team_port *port;
+       bool txable;
+
+       rcu_read_lock();
+       port = team_port_get_rcu(port_dev);
+       txable = port ? team_port_txable(port) : false;
+       rcu_read_unlock();
+
+       return txable;
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static inline void team_netpoll_send_skb(struct team_port *port,
                                         struct sk_buff *skb)
index 27650f1bff3d7ad8595935de659d658f35de6dd8..c759d1cbcedd8d7f19f835457641dee9761a0fa0 100644 (file)
@@ -93,6 +93,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 
 #define IN_DEV_FORWARD(in_dev)         IN_DEV_CONF_GET((in_dev), FORWARDING)
 #define IN_DEV_MFORWARD(in_dev)                IN_DEV_ANDCONF((in_dev), MC_FORWARDING)
+#define IN_DEV_BFORWARD(in_dev)                IN_DEV_ANDCONF((in_dev), BC_FORWARDING)
 #define IN_DEV_RPFILTER(in_dev)                IN_DEV_MAXCONF((in_dev), RP_FILTER)
 #define IN_DEV_SRC_VMARK(in_dev)       IN_DEV_ORCONF((in_dev), SRC_VMARK)
 #define IN_DEV_SOURCE_ROUTE(in_dev)    IN_DEV_ANDCONF((in_dev), \
index 6cc2df7f7ac949e72a59821ae9d25ce529371213..e1c9eea6015b56b3a671813d0dfe2bc2f6d1cf61 100644 (file)
@@ -4,7 +4,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/uidgid.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <uapi/linux/ipc.h>
 #include <linux/refcount.h>
 
index b5630c8eb2f3a910b922eeb08153635e88faadbb..6cea726612b770168eab5041259d8c52fc264bf2 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/refcount.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 struct user_namespace;
 
index ab25c8b6d9e33d84bbaca9410af75339fa94385b..814643f7ee529eb6c9e27ec3e1653487d818595e 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/lockdep.h>
 #include <linux/rbtree.h>
 #include <linux/atomic.h>
+#include <linux/uidgid.h>
 #include <linux/wait.h>
 
 struct file;
@@ -325,12 +326,14 @@ void kernfs_destroy_root(struct kernfs_root *root);
 
 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
                                         const char *name, umode_t mode,
+                                        kuid_t uid, kgid_t gid,
                                         void *priv, const void *ns);
 struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
                                            const char *name);
 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
-                                        const char *name,
-                                        umode_t mode, loff_t size,
+                                        const char *name, umode_t mode,
+                                        kuid_t uid, kgid_t gid,
+                                        loff_t size,
                                         const struct kernfs_ops *ops,
                                         void *priv, const void *ns,
                                         struct lock_class_key *key);
@@ -415,12 +418,14 @@ static inline void kernfs_destroy_root(struct kernfs_root *root) { }
 
 static inline struct kernfs_node *
 kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
-                    umode_t mode, void *priv, const void *ns)
+                    umode_t mode, kuid_t uid, kgid_t gid,
+                    void *priv, const void *ns)
 { return ERR_PTR(-ENOSYS); }
 
 static inline struct kernfs_node *
 __kernfs_create_file(struct kernfs_node *parent, const char *name,
-                    umode_t mode, loff_t size, const struct kernfs_ops *ops,
+                    umode_t mode, kuid_t uid, kgid_t gid,
+                    loff_t size, const struct kernfs_ops *ops,
                     void *priv, const void *ns, struct lock_class_key *key)
 { return ERR_PTR(-ENOSYS); }
 
@@ -498,12 +503,15 @@ static inline struct kernfs_node *
 kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
                  void *priv)
 {
-       return kernfs_create_dir_ns(parent, name, mode, priv, NULL);
+       return kernfs_create_dir_ns(parent, name, mode,
+                                   GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+                                   priv, NULL);
 }
 
 static inline struct kernfs_node *
 kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
-                     umode_t mode, loff_t size, const struct kernfs_ops *ops,
+                     umode_t mode, kuid_t uid, kgid_t gid,
+                     loff_t size, const struct kernfs_ops *ops,
                      void *priv, const void *ns)
 {
        struct lock_class_key *key = NULL;
@@ -511,15 +519,17 @@ kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        key = (struct lock_class_key *)&ops->lockdep_key;
 #endif
-       return __kernfs_create_file(parent, name, mode, size, ops, priv, ns,
-                                   key);
+       return __kernfs_create_file(parent, name, mode, uid, gid,
+                                   size, ops, priv, ns, key);
 }
 
 static inline struct kernfs_node *
 kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
                   loff_t size, const struct kernfs_ops *ops, void *priv)
 {
-       return kernfs_create_file_ns(parent, name, mode, size, ops, priv, NULL);
+       return kernfs_create_file_ns(parent, name, mode,
+                                    GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+                                    size, ops, priv, NULL);
 }
 
 static inline int kernfs_remove_by_name(struct kernfs_node *parent,
index 7f6f93c3df9cef5b734108df3f1041e8d7a0ab37..b49ff230beba19078e8f973f04668ca659382a94 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/wait.h>
 #include <linux/atomic.h>
 #include <linux/workqueue.h>
+#include <linux/uidgid.h>
 
 #define UEVENT_HELPER_PATH_LEN         256
 #define UEVENT_NUM_ENVP                        32      /* number of env pointers */
@@ -114,6 +115,8 @@ extern struct kobject * __must_check kobject_get_unless_zero(
 extern void kobject_put(struct kobject *kobj);
 
 extern const void *kobject_namespace(struct kobject *kobj);
+extern void kobject_get_ownership(struct kobject *kobj,
+                                 kuid_t *uid, kgid_t *gid);
 extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
 struct kobj_type {
@@ -122,6 +125,7 @@ struct kobj_type {
        struct attribute **default_attrs;
        const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
        const void *(*namespace)(struct kobject *kobj);
+       void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid);
 };
 
 struct kobj_uevent_env {
index 4b129df4d46b5a4c26d970c6a0c385b6c208d9d1..de04cc5ed53673ebea7362bea2a43f8355018125 100644 (file)
@@ -285,6 +285,36 @@ static inline void list_cut_position(struct list_head *list,
                __list_cut_position(list, head, entry);
 }
 
+/**
+ * list_cut_before - cut a list into two, before given entry
+ * @list: a new list to add all removed entries
+ * @head: a list with entries
+ * @entry: an entry within head, could be the head itself
+ *
+ * This helper moves the initial part of @head, up to but
+ * excluding @entry, from @head to @list.  You should pass
+ * in @entry an element you know is on @head.  @list should
+ * be an empty list or a list you do not care about losing
+ * its data.
+ * If @entry == @head, all entries on @head are moved to
+ * @list.
+ */
+static inline void list_cut_before(struct list_head *list,
+                                  struct list_head *head,
+                                  struct list_head *entry)
+{
+       if (head->next == entry) {
+               INIT_LIST_HEAD(list);
+               return;
+       }
+       list->next = head->next;
+       list->next->prev = list;
+       list->prev = entry->prev;
+       list->prev->next = list;
+       head->next = entry;
+       entry->prev = head;
+}
+
 static inline void __list_splice(const struct list_head *list,
                                 struct list_head *prev,
                                 struct list_head *next)
index 122e7e9d3091b5b55f4ded91ae7a9dccb620d193..dca6ab4eaa9927168003877f884965520bcdb4f7 100644 (file)
@@ -630,6 +630,7 @@ struct mlx4_caps {
        u32                     vf_caps;
        bool                    wol_port[MLX4_MAX_PORTS + 1];
        struct mlx4_rate_limit_caps rl_caps;
+       u32                     health_buffer_addrs;
 };
 
 struct mlx4_buf_list {
@@ -851,6 +852,12 @@ struct mlx4_vf_dev {
        u8                      n_ports;
 };
 
+struct mlx4_fw_crdump {
+       bool snapshot_enable;
+       struct devlink_region *region_crspace;
+       struct devlink_region *region_fw_health;
+};
+
 enum mlx4_pci_status {
        MLX4_PCI_STATUS_DISABLED,
        MLX4_PCI_STATUS_ENABLED,
@@ -871,6 +878,7 @@ struct mlx4_dev_persistent {
        u8      interface_state;
        struct mutex            pci_status_mutex; /* sync pci state */
        enum mlx4_pci_status    pci_status;
+       struct mlx4_fw_crdump   crdump;
 };
 
 struct mlx4_dev {
index 02f72ebf31a78f221b21eaf74ee9b6debcbe998e..d489494b0a8455e9c41946e6ca9fe544b5f9d780 100644 (file)
@@ -332,6 +332,13 @@ enum mlx5_event {
 
        MLX5_EVENT_TYPE_FPGA_ERROR         = 0x20,
        MLX5_EVENT_TYPE_FPGA_QP_ERROR      = 0x21,
+
+       MLX5_EVENT_TYPE_DEVICE_TRACER      = 0x26,
+};
+
+enum {
+       MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE = 0x0,
+       MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE = 0x1,
 };
 
 enum {
@@ -750,7 +757,7 @@ enum {
 
 #define MLX5_MINI_CQE_ARRAY_SIZE 8
 
-static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
+static inline u8 mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
 {
        return (cqe->op_own >> 2) & 0x3;
 }
@@ -770,14 +777,14 @@ static inline u8 get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe)
        return (cqe->l4_l3_hdr_type >> 2) & 0x3;
 }
 
-static inline u8 cqe_is_tunneled(struct mlx5_cqe64 *cqe)
+static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe)
 {
        return cqe->outer_l3_tunneled & 0x1;
 }
 
-static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe)
+static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe)
 {
-       return !!(cqe->l4_l3_hdr_type & 0x1);
+       return cqe->l4_l3_hdr_type & 0x1;
 }
 
 static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe)
@@ -1071,6 +1078,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_GEN(mdev, cap) \
        MLX5_GET(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
 
+#define MLX5_CAP_GEN_64(mdev, cap) \
+       MLX5_GET64(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+
 #define MLX5_CAP_GEN_MAX(mdev, cap) \
        MLX5_GET(cmd_hca_cap, mdev->caps.hca_max[MLX5_CAP_GENERAL], cap)
 
index 83957920653a0adeb08a90211f937e6227cb32a8..54f385cc881131061d164fcf370a77666344a864 100644 (file)
@@ -138,9 +138,14 @@ enum {
        MLX5_REG_HOST_ENDIANNESS = 0x7004,
        MLX5_REG_MCIA            = 0x9014,
        MLX5_REG_MLCR            = 0x902b,
+       MLX5_REG_MTRC_CAP        = 0x9040,
+       MLX5_REG_MTRC_CONF       = 0x9041,
+       MLX5_REG_MTRC_STDB       = 0x9042,
+       MLX5_REG_MTRC_CTRL       = 0x9043,
        MLX5_REG_MPCNT           = 0x9051,
        MLX5_REG_MTPPS           = 0x9053,
        MLX5_REG_MTPPSE          = 0x9054,
+       MLX5_REG_MPEGC           = 0x9056,
        MLX5_REG_MCQI            = 0x9061,
        MLX5_REG_MCC             = 0x9062,
        MLX5_REG_MCDA            = 0x9063,
@@ -812,6 +817,9 @@ struct mlx5_clock {
        struct mlx5_pps            pps_info;
 };
 
+struct mlx5_fw_tracer;
+struct mlx5_vxlan;
+
 struct mlx5_core_dev {
        struct pci_dev         *pdev;
        /* sync pci state */
@@ -843,6 +851,7 @@ struct mlx5_core_dev {
        atomic_t                num_qps;
        u32                     issi;
        struct mlx5e_resources  mlx5e_res;
+       struct mlx5_vxlan       *vxlan;
        struct {
                struct mlx5_rsvd_gids   reserved_gids;
                u32                     roce_en;
@@ -856,6 +865,7 @@ struct mlx5_core_dev {
        struct mlx5_clock        clock;
        struct mlx5_ib_clock_info  *clock_info;
        struct page             *clock_info_page;
+       struct mlx5_fw_tracer   *tracer;
 };
 
 struct mlx5_db {
@@ -1079,8 +1089,6 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
                           struct mlx5_core_mkey *mkey);
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
                         u32 *out, int outlen);
-int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
-                            u32 *mkey);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
index 757b4a30281e6e8c82be94db32c7982089817350..c40f2fc68655d368a49a1d7b0904e5eda45bbc78 100644 (file)
@@ -152,6 +152,8 @@ struct mlx5_fs_vlan {
         u8  prio;
 };
 
+#define MLX5_FS_VLAN_DEPTH     2
+
 struct mlx5_flow_act {
        u32 action;
        bool has_flow_tag;
@@ -159,7 +161,7 @@ struct mlx5_flow_act {
        u32 encap_id;
        u32 modify_id;
        uintptr_t esp_id;
-       struct mlx5_fs_vlan vlan;
+       struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
        struct ib_counters *counters;
 };
 
index ac281f5ec9b8077ba859f33eaf61e3f03ecdeb3d..60c2308fe062a1b35ad521a7b30a950823685f36 100644 (file)
@@ -75,6 +75,15 @@ enum {
        MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
 };
 
+enum {
+       MLX5_GENERAL_OBJ_TYPES_CAP_UCTX = (1ULL << 4),
+       MLX5_GENERAL_OBJ_TYPES_CAP_UMEM = (1ULL << 5),
+};
+
+enum {
+       MLX5_OBJ_TYPE_UCTX = 0x0004,
+};
+
 enum {
        MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
        MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
@@ -242,6 +251,8 @@ enum {
        MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
        MLX5_CMD_OP_FPGA_DESTROY_QP               = 0x963,
        MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS        = 0x964,
+       MLX5_CMD_OP_CREATE_GENERAL_OBJECT         = 0xa00,
+       MLX5_CMD_OP_DESTROY_GENERAL_OBJECT        = 0xa03,
        MLX5_CMD_OP_MAX
 };
 
@@ -326,7 +337,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
        u8         reserved_at_9[0x1];
        u8         pop_vlan[0x1];
        u8         push_vlan[0x1];
-       u8         reserved_at_c[0x14];
+       u8         reserved_at_c[0x1];
+       u8         pop_vlan_2[0x1];
+       u8         push_vlan_2[0x1];
+       u8         reserved_at_f[0x11];
 
        u8         reserved_at_20[0x2];
        u8         log_max_ft_size[0x6];
@@ -654,7 +668,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
        u8         swp[0x1];
        u8         swp_csum[0x1];
        u8         swp_lso[0x1];
-       u8         reserved_at_23[0x1b];
+       u8         reserved_at_23[0xd];
+       u8         max_vxlan_udp_ports[0x8];
+       u8         reserved_at_38[0x6];
        u8         max_geneve_opt_len[0x1];
        u8         tunnel_stateless_geneve_rx[0x1];
 
@@ -874,7 +890,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         log_max_eq_sz[0x8];
        u8         reserved_at_e8[0x2];
        u8         log_max_mkey[0x6];
-       u8         reserved_at_f0[0xc];
+       u8         reserved_at_f0[0x8];
+       u8         dump_fill_mkey[0x1];
+       u8         reserved_at_f9[0x3];
        u8         log_max_eq[0x4];
 
        u8         max_indirection[0x8];
@@ -1113,7 +1131,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         reserved_at_3f8[0x3];
        u8         log_max_current_uc_list[0x5];
 
-       u8         reserved_at_400[0x80];
+       u8         general_obj_types[0x40];
+
+       u8         reserved_at_440[0x40];
 
        u8         reserved_at_480[0x3];
        u8         log_max_l2_table[0x5];
@@ -1668,7 +1688,11 @@ struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
 
        u8         rx_buffer_full_low[0x20];
 
-       u8         reserved_at_1c0[0x600];
+       u8         rx_icrc_encapsulated_high[0x20];
+
+       u8         rx_icrc_encapsulated_low[0x20];
+
+       u8         reserved_at_200[0x5c0];
 };
 
 struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits {
@@ -2367,6 +2391,8 @@ enum {
        MLX5_FLOW_CONTEXT_ACTION_MOD_HDR   = 0x40,
        MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  = 0x80,
        MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH = 0x100,
+       MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2  = 0x400,
+       MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800,
 };
 
 struct mlx5_ifc_vlan_bits {
@@ -2397,7 +2423,9 @@ struct mlx5_ifc_flow_context_bits {
 
        u8         modify_header_id[0x20];
 
-       u8         reserved_at_100[0x100];
+       struct mlx5_ifc_vlan_bits push_vlan_2;
+
+       u8         reserved_at_120[0xe0];
 
        struct mlx5_ifc_fte_match_param_bits match_value;
 
@@ -8030,9 +8058,23 @@ struct mlx5_ifc_peir_reg_bits {
        u8         error_type[0x8];
 };
 
-struct mlx5_ifc_pcam_enhanced_features_bits {
-       u8         reserved_at_0[0x76];
+struct mlx5_ifc_mpegc_reg_bits {
+       u8         reserved_at_0[0x30];
+       u8         field_select[0x10];
+
+       u8         tx_overflow_sense[0x1];
+       u8         mark_cqe[0x1];
+       u8         mark_cnp[0x1];
+       u8         reserved_at_43[0x1b];
+       u8         tx_lossy_overflow_oper[0x2];
+
+       u8         reserved_at_60[0x100];
+};
 
+struct mlx5_ifc_pcam_enhanced_features_bits {
+       u8         reserved_at_0[0x6d];
+       u8         rx_icrc_encapsulated_counter[0x1];
+       u8         reserved_at_6e[0x8];
        u8         pfcc_mask[0x1];
        u8         reserved_at_77[0x4];
        u8         rx_buffer_fullness_counters[0x1];
@@ -8077,7 +8119,11 @@ struct mlx5_ifc_pcam_reg_bits {
 };
 
 struct mlx5_ifc_mcam_enhanced_features_bits {
-       u8         reserved_at_0[0x7b];
+       u8         reserved_at_0[0x74];
+       u8         mark_tx_action_cnp[0x1];
+       u8         mark_tx_action_cqe[0x1];
+       u8         dynamic_tx_overflow[0x1];
+       u8         reserved_at_77[0x4];
        u8         pcie_outbound_stalled[0x1];
        u8         tx_overflow_buffer_pkt[0x1];
        u8         mtpps_enh_out_per_adj[0x1];
@@ -8092,7 +8138,11 @@ struct mlx5_ifc_mcam_access_reg_bits {
        u8         mcqi[0x1];
        u8         reserved_at_1f[0x1];
 
-       u8         regs_95_to_64[0x20];
+       u8         regs_95_to_87[0x9];
+       u8         mpegc[0x1];
+       u8         regs_85_to_68[0x12];
+       u8         tracer_registers[0x4];
+
        u8         regs_63_to_32[0x20];
        u8         regs_31_to_0[0x20];
 };
@@ -9115,4 +9165,113 @@ struct mlx5_ifc_dealloc_memic_out_bits {
        u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
+       u8         opcode[0x10];
+       u8         uid[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         obj_type[0x10];
+
+       u8         obj_id[0x20];
+
+       u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         obj_id[0x20];
+
+       u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_umem_bits {
+       u8         modify_field_select[0x40];
+
+       u8         reserved_at_40[0x5b];
+       u8         log_page_size[0x5];
+
+       u8         page_offset[0x20];
+
+       u8         num_of_mtt[0x40];
+
+       struct mlx5_ifc_mtt_bits  mtt[0];
+};
+
+struct mlx5_ifc_uctx_bits {
+       u8         modify_field_select[0x40];
+
+       u8         reserved_at_40[0x1c0];
+};
+
+struct mlx5_ifc_create_umem_in_bits {
+       struct mlx5_ifc_general_obj_in_cmd_hdr_bits   hdr;
+       struct mlx5_ifc_umem_bits                     umem;
+};
+
+struct mlx5_ifc_create_uctx_in_bits {
+       struct mlx5_ifc_general_obj_in_cmd_hdr_bits   hdr;
+       struct mlx5_ifc_uctx_bits                     uctx;
+};
+
+struct mlx5_ifc_mtrc_string_db_param_bits {
+       u8         string_db_base_address[0x20];
+
+       u8         reserved_at_20[0x8];
+       u8         string_db_size[0x18];
+};
+
+struct mlx5_ifc_mtrc_cap_bits {
+       u8         trace_owner[0x1];
+       u8         trace_to_memory[0x1];
+       u8         reserved_at_2[0x4];
+       u8         trc_ver[0x2];
+       u8         reserved_at_8[0x14];
+       u8         num_string_db[0x4];
+
+       u8         first_string_trace[0x8];
+       u8         num_string_trace[0x8];
+       u8         reserved_at_30[0x28];
+
+       u8         log_max_trace_buffer_size[0x8];
+
+       u8         reserved_at_60[0x20];
+
+       struct mlx5_ifc_mtrc_string_db_param_bits string_db_param[8];
+
+       u8         reserved_at_280[0x180];
+};
+
+struct mlx5_ifc_mtrc_conf_bits {
+       u8         reserved_at_0[0x1c];
+       u8         trace_mode[0x4];
+       u8         reserved_at_20[0x18];
+       u8         log_trace_buffer_size[0x8];
+       u8         trace_mkey[0x20];
+       u8         reserved_at_60[0x3a0];
+};
+
+struct mlx5_ifc_mtrc_stdb_bits {
+       u8         string_db_index[0x4];
+       u8         reserved_at_4[0x4];
+       u8         read_size[0x18];
+       u8         start_offset[0x20];
+       u8         string_db_data[0];
+};
+
+struct mlx5_ifc_mtrc_ctrl_bits {
+       u8         trace_status[0x2];
+       u8         reserved_at_2[0x2];
+       u8         arm_event[0x1];
+       u8         reserved_at_5[0xb];
+       u8         modify_field_select[0x10];
+       u8         reserved_at_20[0x2b];
+       u8         current_timestamp52_32[0x15];
+       u8         current_timestamp31_0[0x20];
+       u8         reserved_at_80[0x180];
+};
+
 #endif /* MLX5_IFC_H */
index 64d0f40d4cc36924ff051bfa14db27fe56a8af89..37e065a80a436f7ae53b77cda133da9bdcc0ef34 100644 (file)
@@ -576,6 +576,7 @@ struct mlx5_ifc_fpga_ipsec_sa {
 enum fpga_tls_cmds {
        CMD_SETUP_STREAM                = 0x1001,
        CMD_TEARDOWN_STREAM             = 0x1002,
+       CMD_RESYNC_RX                   = 0x1003,
 };
 
 #define MLX5_TLS_1_2 (0)
index d633f737b3c63f1eb137e55cd56bcdb2a873757a..6675b9f819798414a81e2b4104b3090392cd0ee6 100644 (file)
@@ -2,7 +2,7 @@
 #define __LINUX_MROUTE_BASE_H
 
 #include <linux/netdevice.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/spinlock.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -254,6 +254,7 @@ struct mr_table {
        atomic_t                cache_resolve_queue_len;
        bool                    mroute_do_assert;
        bool                    mroute_do_pim;
+       bool                    mroute_do_wrvifwhole;
        int                     mroute_reg_vif_num;
 };
 
index 6554d3ba4396b3df49acac934ad16eeb71a695f4..e0930678c8bf6e6061020176e8013b6e33d8264d 100644 (file)
@@ -114,7 +114,7 @@ struct socket {
 
        unsigned long           flags;
 
-       struct socket_wq __rcu  *wq;
+       struct socket_wq        *wq;
 
        struct file             *file;
        struct sock             *sk;
index 623bb8ced060046fdb2b856a147696ec1280be50..2b2a6dce16301d4d9b683dad7503b383d8708500 100644 (file)
@@ -79,6 +79,7 @@ enum {
        NETIF_F_HW_ESP_TX_CSUM_BIT,     /* ESP with TX checksum offload */
        NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */
        NETIF_F_HW_TLS_TX_BIT,          /* Hardware TLS TX offload */
+       NETIF_F_HW_TLS_RX_BIT,          /* Hardware TLS RX offload */
 
        NETIF_F_GRO_HW_BIT,             /* Hardware Generic receive offload */
        NETIF_F_HW_TLS_RECORD_BIT,      /* Offload TLS record */
@@ -151,6 +152,7 @@ enum {
 #define NETIF_F_HW_TLS_RECORD  __NETIF_F(HW_TLS_RECORD)
 #define NETIF_F_GSO_UDP_L4     __NETIF_F(GSO_UDP_L4)
 #define NETIF_F_HW_TLS_TX      __NETIF_F(HW_TLS_TX)
+#define NETIF_F_HW_TLS_RX      __NETIF_F(HW_TLS_RX)
 
 #define for_each_netdev_feature(mask_addr, bit)        \
        for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
index 3d0cc0b5cec2d7514dbebf32effab9b1e6388c3c..282e2e95ad5b13b3c70fafa444b356e10e764a5e 100644 (file)
@@ -302,6 +302,17 @@ struct netdev_boot_setup {
 
 int __init netdev_boot_setup(char *str);
 
+struct gro_list {
+       struct list_head        list;
+       int                     count;
+};
+
+/*
+ * size of gro hash buckets, must less than bit number of
+ * napi_struct::gro_bitmask
+ */
+#define GRO_HASH_BUCKETS       8
+
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
  */
@@ -316,13 +327,13 @@ struct napi_struct {
 
        unsigned long           state;
        int                     weight;
-       unsigned int            gro_count;
+       unsigned long           gro_bitmask;
        int                     (*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
        int                     poll_owner;
 #endif
        struct net_device       *dev;
-       struct sk_buff          *gro_list;
+       struct gro_list         gro_hash[GRO_HASH_BUCKETS];
        struct sk_buff          *skb;
        struct hrtimer          timer;
        struct list_head        dev_list;
@@ -569,6 +580,9 @@ struct netdev_queue {
         * (/sys/class/net/DEV/Q/trans_timeout)
         */
        unsigned long           trans_timeout;
+
+       /* Subordinate device that the queue has been assigned to */
+       struct net_device       *sb_dev;
 /*
  * write-mostly part
  */
@@ -730,10 +744,15 @@ struct xps_map {
  */
 struct xps_dev_maps {
        struct rcu_head rcu;
-       struct xps_map __rcu *cpu_map[0];
+       struct xps_map __rcu *attr_map[0]; /* Either CPUs map or RXQs map */
 };
-#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +         \
+
+#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +     \
        (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
+
+#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
+       (_rxqs * (_tcs) * sizeof(struct xps_map *)))
+
 #endif /* CONFIG_XPS */
 
 #define TC_MAX_QUEUE   16
@@ -779,7 +798,8 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
 }
 
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
-                                      struct sk_buff *skb);
+                                      struct sk_buff *skb,
+                                      struct net_device *sb_dev);
 
 enum tc_setup_type {
        TC_SETUP_QDISC_MQPRIO,
@@ -792,6 +812,7 @@ enum tc_setup_type {
        TC_SETUP_QDISC_RED,
        TC_SETUP_QDISC_PRIO,
        TC_SETUP_QDISC_MQ,
+       TC_SETUP_QDISC_ETF,
 };
 
 /* These structures hold the attributes of bpf state that are being passed
@@ -807,11 +828,8 @@ enum bpf_netdev_command {
         */
        XDP_SETUP_PROG,
        XDP_SETUP_PROG_HW,
-       /* Check if a bpf program is set on the device.  The callee should
-        * set @prog_attached to one of XDP_ATTACHED_* values, note that "true"
-        * is equivalent to XDP_ATTACHED_DRV.
-        */
        XDP_QUERY_PROG,
+       XDP_QUERY_PROG_HW,
        /* BPF program for offload callbacks, invoked at program load time. */
        BPF_OFFLOAD_VERIFIER_PREP,
        BPF_OFFLOAD_TRANSLATE,
@@ -835,9 +853,8 @@ struct netdev_bpf {
                        struct bpf_prog *prog;
                        struct netlink_ext_ack *extack;
                };
-               /* XDP_QUERY_PROG */
+               /* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */
                struct {
-                       u8 prog_attached;
                        u32 prog_id;
                        /* flags with which program was installed */
                        u32 prog_flags;
@@ -855,10 +872,10 @@ struct netdev_bpf {
                struct {
                        struct bpf_offloaded_map *offmap;
                };
-               /* XDP_SETUP_XSK_UMEM */
+               /* XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM */
                struct {
-                       struct xdp_umem *umem;
-                       u16 queue_id;
+                       struct xdp_umem *umem; /* out for query*/
+                       u16 queue_id; /* in for query */
                } xsk;
        };
 };
@@ -891,6 +908,8 @@ struct tlsdev_ops {
        void (*tls_dev_del)(struct net_device *netdev,
                            struct tls_context *ctx,
                            enum tls_offload_ctx_dir direction);
+       void (*tls_dev_resync_rx)(struct net_device *netdev,
+                                 struct sock *sk, u32 seq, u64 rcd_sn);
 };
 #endif
 
@@ -942,7 +961,8 @@ struct dev_ifalias {
  *     those the driver believes to be appropriate.
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
- *                         void *accel_priv, select_queue_fallback_t fallback);
+ *                         struct net_device *sb_dev,
+ *                         select_queue_fallback_t fallback);
  *     Called to decide which queue to use when device supports multiple
  *     transmit queues.
  *
@@ -1214,7 +1234,7 @@ struct net_device_ops {
                                                      netdev_features_t features);
        u16                     (*ndo_select_queue)(struct net_device *dev,
                                                    struct sk_buff *skb,
-                                                   void *accel_priv,
+                                                   struct net_device *sb_dev,
                                                    select_queue_fallback_t fallback);
        void                    (*ndo_change_rx_flags)(struct net_device *dev,
                                                       int flags);
@@ -1909,7 +1929,8 @@ struct net_device {
        int                     watchdog_timeo;
 
 #ifdef CONFIG_XPS
-       struct xps_dev_maps __rcu *xps_maps;
+       struct xps_dev_maps __rcu *xps_cpus_map;
+       struct xps_dev_maps __rcu *xps_rxqs_map;
 #endif
 #ifdef CONFIG_NET_CLS_ACT
        struct mini_Qdisc __rcu *miniq_egress;
@@ -1978,7 +1999,7 @@ struct net_device {
 #ifdef CONFIG_DCB
        const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
-       u8                      num_tc;
+       s16                     num_tc;
        struct netdev_tc_txq    tc_to_txq[TC_MAX_QUEUE];
        u8                      prio_tc_map[TC_BITMASK + 1];
 
@@ -2032,6 +2053,17 @@ int netdev_get_num_tc(struct net_device *dev)
        return dev->num_tc;
 }
 
+void netdev_unbind_sb_channel(struct net_device *dev,
+                             struct net_device *sb_dev);
+int netdev_bind_sb_channel_queue(struct net_device *dev,
+                                struct net_device *sb_dev,
+                                u8 tc, u16 count, u16 offset);
+int netdev_set_sb_channel(struct net_device *dev, u16 channel);
+static inline int netdev_get_sb_channel(struct net_device *dev)
+{
+       return max_t(int, -dev->num_tc, 0);
+}
+
 static inline
 struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
                                         unsigned int index)
@@ -2076,7 +2108,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
                                    struct sk_buff *skb,
-                                   void *accel_priv);
+                                   struct net_device *sb_dev);
 
 /* returns the headroom that the master device needs to take in account
  * when forwarding to this dev
@@ -2255,10 +2287,10 @@ static inline int gro_recursion_inc_test(struct sk_buff *skb)
        return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
 }
 
-typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *);
-static inline struct sk_buff **call_gro_receive(gro_receive_t cb,
-                                               struct sk_buff **head,
-                                               struct sk_buff *skb)
+typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *);
+static inline struct sk_buff *call_gro_receive(gro_receive_t cb,
+                                              struct list_head *head,
+                                              struct sk_buff *skb)
 {
        if (unlikely(gro_recursion_inc_test(skb))) {
                NAPI_GRO_CB(skb)->flush |= 1;
@@ -2268,12 +2300,12 @@ static inline struct sk_buff **call_gro_receive(gro_receive_t cb,
        return cb(head, skb);
 }
 
-typedef struct sk_buff **(*gro_receive_sk_t)(struct sock *, struct sk_buff **,
-                                            struct sk_buff *);
-static inline struct sk_buff **call_gro_receive_sk(gro_receive_sk_t cb,
-                                                  struct sock *sk,
-                                                  struct sk_buff **head,
-                                                  struct sk_buff *skb)
+typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *,
+                                           struct sk_buff *);
+static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
+                                                 struct sock *sk,
+                                                 struct list_head *head,
+                                                 struct sk_buff *skb)
 {
        if (unlikely(gro_recursion_inc_test(skb))) {
                NAPI_GRO_CB(skb)->flush |= 1;
@@ -2290,6 +2322,9 @@ struct packet_type {
                                         struct net_device *,
                                         struct packet_type *,
                                         struct net_device *);
+       void                    (*list_func) (struct list_head *,
+                                             struct packet_type *,
+                                             struct net_device *);
        bool                    (*id_match)(struct packet_type *ptype,
                                            struct sock *sk);
        void                    *af_packet_priv;
@@ -2299,8 +2334,8 @@ struct packet_type {
 struct offload_callbacks {
        struct sk_buff          *(*gso_segment)(struct sk_buff *skb,
                                                netdev_features_t features);
-       struct sk_buff          **(*gro_receive)(struct sk_buff **head,
-                                                struct sk_buff *skb);
+       struct sk_buff          *(*gro_receive)(struct list_head *head,
+                                               struct sk_buff *skb);
        int                     (*gro_complete)(struct sk_buff *skb, int nhoff);
 };
 
@@ -2537,8 +2572,14 @@ void dev_close(struct net_device *dev);
 void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
+u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
+                    struct net_device *sb_dev,
+                    select_queue_fallback_t fallback);
+u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
+                      struct net_device *sb_dev,
+                      select_queue_fallback_t fallback);
 int dev_queue_xmit(struct sk_buff *skb);
-int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
+int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
 int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
@@ -2568,7 +2609,7 @@ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
 struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
-int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
 
 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
 {
@@ -2784,13 +2825,13 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
 }
 
 #ifdef CONFIG_XFRM_OFFLOAD
-static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
 {
        if (PTR_ERR(pp) != -EINPROGRESS)
                NAPI_GRO_CB(skb)->flush |= flush;
 }
 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
-                                              struct sk_buff **pp,
+                                              struct sk_buff *pp,
                                               int flush,
                                               struct gro_remcsum *grc)
 {
@@ -2801,12 +2842,12 @@ static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
        }
 }
 #else
-static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
 {
        NAPI_GRO_CB(skb)->flush |= flush;
 }
 static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
-                                              struct sk_buff **pp,
+                                              struct sk_buff *pp,
                                               int flush,
                                               struct gro_remcsum *grc)
 {
@@ -3278,6 +3319,92 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 #ifdef CONFIG_XPS
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
                        u16 index);
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+                         u16 index, bool is_rxqs_map);
+
+/**
+ *     netif_attr_test_mask - Test a CPU or Rx queue set in a mask
+ *     @j: CPU/Rx queue index
+ *     @mask: bitmask of all cpus/rx queues
+ *     @nr_bits: number of bits in the bitmask
+ *
+ * Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues.
+ */
+static inline bool netif_attr_test_mask(unsigned long j,
+                                       const unsigned long *mask,
+                                       unsigned int nr_bits)
+{
+       cpu_max_bits_warn(j, nr_bits);
+       return test_bit(j, mask);
+}
+
+/**
+ *     netif_attr_test_online - Test for online CPU/Rx queue
+ *     @j: CPU/Rx queue index
+ *     @online_mask: bitmask for CPUs/Rx queues that are online
+ *     @nr_bits: number of bits in the bitmask
+ *
+ * Returns true if a CPU/Rx queue is online.
+ */
+static inline bool netif_attr_test_online(unsigned long j,
+                                         const unsigned long *online_mask,
+                                         unsigned int nr_bits)
+{
+       cpu_max_bits_warn(j, nr_bits);
+
+       if (online_mask)
+               return test_bit(j, online_mask);
+
+       return (j < nr_bits);
+}
+
+/**
+ *     netif_attrmask_next - get the next CPU/Rx queue in a cpu/Rx queues mask
+ *     @n: CPU/Rx queue index
+ *     @srcp: the cpumask/Rx queue mask pointer
+ *     @nr_bits: number of bits in the bitmask
+ *
+ * Returns >= nr_bits if no further CPUs/Rx queues set.
+ */
+static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
+                                              unsigned int nr_bits)
+{
+       /* -1 is a legal arg here. */
+       if (n != -1)
+               cpu_max_bits_warn(n, nr_bits);
+
+       if (srcp)
+               return find_next_bit(srcp, nr_bits, n + 1);
+
+       return n + 1;
+}
+
+/**
+ *     netif_attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p
+ *     @n: CPU/Rx queue index
+ *     @src1p: the first CPUs/Rx queues mask pointer
+ *     @src2p: the second CPUs/Rx queues mask pointer
+ *     @nr_bits: number of bits in the bitmask
+ *
+ * Returns >= nr_bits if no further CPUs/Rx queues set in both.
+ */
+static inline int netif_attrmask_next_and(int n, const unsigned long *src1p,
+                                         const unsigned long *src2p,
+                                         unsigned int nr_bits)
+{
+       /* -1 is a legal arg here. */
+       if (n != -1)
+               cpu_max_bits_warn(n, nr_bits);
+
+       if (src1p && src2p)
+               return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
+       else if (src1p)
+               return find_next_bit(src1p, nr_bits, n + 1);
+       else if (src2p)
+               return find_next_bit(src2p, nr_bits, n + 1);
+
+       return n + 1;
+}
 #else
 static inline int netif_set_xps_queue(struct net_device *dev,
                                      const struct cpumask *mask,
@@ -3304,8 +3431,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);
 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
 #else
 static inline int netif_set_real_num_rx_queues(struct net_device *dev,
-                                               unsigned int rxq)
+                                               unsigned int rxqs)
 {
+       dev->real_num_rx_queues = rxqs;
        return 0;
 }
 #endif
@@ -3384,6 +3512,7 @@ int netif_rx(struct sk_buff *skb);
 int netif_rx_ni(struct sk_buff *skb);
 int netif_receive_skb(struct sk_buff *skb);
 int netif_receive_skb_core(struct sk_buff *skb);
+void netif_receive_skb_list(struct list_head *head);
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
 void napi_gro_flush(struct napi_struct *napi, bool flush_old);
 struct sk_buff *napi_get_frags(struct napi_struct *napi);
@@ -3418,6 +3547,8 @@ int dev_set_alias(struct net_device *, const char *, size_t);
 int dev_get_alias(const struct net_device *, char *, size_t);
 int dev_change_net_namespace(struct net_device *, struct net *, const char *);
 int __dev_set_mtu(struct net_device *, int);
+int dev_set_mtu_ext(struct net_device *dev, int mtu,
+                   struct netlink_ext_ack *extack);
 int dev_set_mtu(struct net_device *, int);
 int dev_change_tx_queue_len(struct net_device *, unsigned long);
 void dev_set_group(struct net_device *, int);
@@ -3435,8 +3566,9 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                      int fd, u32 flags);
-void __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
-                    struct netdev_bpf *xdp);
+u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
+                   enum bpf_netdev_command cmd);
+int xdp_umem_query(struct net_device *dev, u16 queue_id);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
index dd2052f0efb7742f881cdcf334edc570c7d4d790..07efffd0c759d0b509dec19acb6b718cbab06031 100644 (file)
@@ -288,6 +288,24 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct
        return ret;
 }
 
+static inline void
+NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+            struct list_head *head, struct net_device *in, struct net_device *out,
+            int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+{
+       struct sk_buff *skb, *next;
+       struct list_head sublist;
+
+       INIT_LIST_HEAD(&sublist);
+       list_for_each_entry_safe(skb, next, head, list) {
+               list_del(&skb->list);
+               if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1)
+                       list_add_tail(&skb->list, &sublist);
+       }
+       /* Put passed packets back on main list */
+       list_splice(&sublist, head);
+}
+
 /* Call setsockopt() */
 int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
                  unsigned int len);
@@ -369,6 +387,14 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
        return okfn(net, sk, skb);
 }
 
+static inline void
+NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+            struct list_head *head, struct net_device *in, struct net_device *out,
+            int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+{
+       /* nothing to do */
+}
+
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
                          struct sock *sk, struct sk_buff *skb,
                          struct net_device *indev, struct net_device *outdev,
@@ -388,8 +414,17 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 
 extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
 void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+struct nf_conntrack_tuple;
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+                        const struct sk_buff *skb);
 #else
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
+struct nf_conntrack_tuple;
+static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+                                      const struct sk_buff *skb)
+{
+       return false;
+}
 #endif
 
 struct nf_conn;
@@ -398,6 +433,8 @@ enum ip_conntrack_info;
 struct nf_ct_hook {
        int (*update)(struct net *net, struct sk_buff *skb);
        void (*destroy)(struct nf_conntrack *);
+       bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
+                             const struct sk_buff *);
 };
 extern struct nf_ct_hook __rcu *nf_ct_hook;
 
index 3ecc3050be0ec29fc9716b3ad616a43131d36be3..4a520d3304a2fad7a47ddd366672c2fbb054ec1e 100644 (file)
@@ -29,6 +29,7 @@ struct nfnetlink_subsystem {
        __u8 subsys_id;                 /* nfnetlink subsystem ID */
        __u8 cb_count;                  /* number of callbacks */
        const struct nfnl_callback *cb; /* callback for individual types */
+       struct module *owner;
        int (*commit)(struct net *net, struct sk_buff *skb);
        int (*abort)(struct net *net, struct sk_buff *skb);
        void (*cleanup)(struct net *net);
index b31dabfdb453e3d178f68ee8dbaf57af7d708c58..95ab5cc64422687a0003a8719facb8f4a480285b 100644 (file)
@@ -23,9 +23,6 @@ struct nf_queue_entry;
 #ifdef CONFIG_INET
 __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
                       unsigned int dataoff, u_int8_t protocol);
-__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
-                              unsigned int dataoff, unsigned int len,
-                              u_int8_t protocol);
 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
                bool strict);
 int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry);
@@ -35,14 +32,6 @@ static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 {
        return 0;
 }
-static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb,
-                                            unsigned int hook,
-                                            unsigned int dataoff,
-                                            unsigned int len,
-                                            u_int8_t protocol)
-{
-       return 0;
-}
 static inline int nf_ip_route(struct net *net, struct dst_entry **dst,
                              struct flowi *fl, bool strict)
 {
index 288c597e75b304ea70c6a3389a84e4d6f7104031..c0dc4dd78887a69757291f25d66d2906f3786555 100644 (file)
@@ -30,11 +30,6 @@ struct nf_ipv6_ops {
        void (*route_input)(struct sk_buff *skb);
        int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
                        int (*output)(struct net *, struct sock *, struct sk_buff *));
-       __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
-                           unsigned int dataoff, u_int8_t protocol);
-       __sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
-                                   unsigned int dataoff, unsigned int len,
-                                   u_int8_t protocol);
        int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
                     bool strict);
        int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
index f3075d6c7e8229c999ab650537f1e3b11e1f457b..71f121b66ca896a455d4ebf310c94d4a0472eaff 100644 (file)
@@ -170,7 +170,6 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 struct netlink_callback {
        struct sk_buff          *skb;
        const struct nlmsghdr   *nlh;
-       int                     (*start)(struct netlink_callback *);
        int                     (*dump)(struct sk_buff * skb,
                                        struct netlink_callback *cb);
        int                     (*done)(struct netlink_callback *cb);
index e6b240b6196cae3a7dad22693af3857849fb0c8f..379affc63e24c64937c503f6e87afed63bbb8db9 100644 (file)
@@ -21,4 +21,9 @@
 
 #include <uapi/linux/openvswitch.h>
 
+#define OVS_CLONE_ATTR_EXEC      0   /* Specify an u32 value. When nonzero,
+                                     * actions in clone will not change flow
+                                     * keys. False otherwise.
+                                     */
+
 #endif /* _LINUX_OPENVSWITCH_H */
index 6cd09098427c5bcd93d58f1d842165c2b9ffe7c0..cd6f637cbbfb60a099713f0987bcd2db134ea1c2 100644 (file)
@@ -824,6 +824,16 @@ static inline bool phy_interrupt_is_valid(struct phy_device *phydev)
        return phydev->irq != PHY_POLL && phydev->irq != PHY_IGNORE_INTERRUPT;
 }
 
+/**
+ * phy_polling_mode - Convenience function for testing whether polling is
+ * used to detect PHY status changes
+ * @phydev: the phy_device struct
+ */
+static inline bool phy_polling_mode(struct phy_device *phydev)
+{
+       return phydev->irq == PHY_POLL;
+}
+
 /**
  * phy_is_internal - Convenience function for testing if a PHY is internal
  * @phydev: the phy_device struct
@@ -942,6 +952,8 @@ void phy_start(struct phy_device *phydev);
 void phy_stop(struct phy_device *phydev);
 int phy_start_aneg(struct phy_device *phydev);
 int phy_aneg_done(struct phy_device *phydev);
+int phy_speed_down(struct phy_device *phydev, bool sync);
+int phy_speed_up(struct phy_device *phydev);
 
 int phy_stop_interrupts(struct phy_device *phydev);
 int phy_restart_aneg(struct phy_device *phydev);
index b4040023cbfba2a637b4e2ba4ee147558fe82a9c..8cd34645e892623b71d5ee446dfb6a4423594e93 100644 (file)
@@ -759,6 +759,9 @@ struct qed_generic_tlvs {
        u8 mac[QED_TLV_MAC_COUNT][ETH_ALEN];
 };
 
+#define QED_I2C_DEV_ADDR_A0 0xA0
+#define QED_I2C_DEV_ADDR_A2 0xA2
+
 #define QED_NVM_SIGNATURE 0x12435687
 
 enum qed_nvm_flash_cmd {
@@ -1026,6 +1029,18 @@ struct qed_common_ops {
  * @param enabled - true iff WoL should be enabled.
  */
        int (*update_wol) (struct qed_dev *cdev, bool enabled);
+
+/**
+ * @brief read_module_eeprom
+ *
+ * @param cdev
+ * @param buf - buffer
+ * @param dev_addr - PHY device memory region
+ * @param offset - offset into eeprom contents to be read
+ * @param len - buffer length, i.e., max bytes to be read
+ */
+       int (*read_module_eeprom)(struct qed_dev *cdev,
+                                 char *buf, u8 dev_addr, u32 offset, u32 len);
 };
 
 #define MASK_FIELD(_name, _value) \
index e031e9f2f9d85f1330a9f38b1a82c7f322f00f3e..585ce89c0f336d0d5ab6895be6ccf9fe1b0a1c02 100644 (file)
@@ -25,6 +25,9 @@ struct reciprocal_value {
        u8 sh1, sh2;
 };
 
+/* "reciprocal_value" and "reciprocal_divide" together implement the basic
+ * version of the algorithm described in Figure 4.1 of the paper.
+ */
 struct reciprocal_value reciprocal_value(u32 d);
 
 static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
@@ -33,4 +36,69 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
        return (t + ((a - t) >> R.sh1)) >> R.sh2;
 }
 
+struct reciprocal_value_adv {
+       u32 m;
+       u8 sh, exp;
+       bool is_wide_m;
+};
+
+/* "reciprocal_value_adv" implements the advanced version of the algorithm
+ * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose
+ * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The
+ * exception case could be easily handled before calling "reciprocal_value_adv".
+ *
+ * The advanced version requires more complex calculation to get the reciprocal
+ * multiplier and other control variables, but then could reduce the required
+ * emulation operations.
+ *
+ * It makes no sense to use this advanced version for host divide emulation,
+ * those extra complexities for calculating multiplier etc could completely
+ * waive our saving on emulation operations.
+ *
+ * However, it makes sense to use it for JIT divide code generation for which
+ * we are willing to trade performance of JITed code with that of host. As shown
+ * by the following pseudo code, the required emulation operations could go down
+ * from 6 (the basic version) to 3 or 4.
+ *
+ * To use the result of "reciprocal_value_adv", suppose we want to calculate
+ * n/d, the pseudo C code will be:
+ *
+ *   struct reciprocal_value_adv rvalue;
+ *   u8 pre_shift, exp;
+ *
+ *   // handle exception case.
+ *   if (d >= (1U << 31)) {
+ *     result = n >= d;
+ *     return;
+ *   }
+ *
+ *   rvalue = reciprocal_value_adv(d, 32)
+ *   exp = rvalue.exp;
+ *   if (rvalue.is_wide_m && !(d & 1)) {
+ *     // floor(log2(d & (2^32 -d)))
+ *     pre_shift = fls(d & -d) - 1;
+ *     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
+ *   } else {
+ *     pre_shift = 0;
+ *   }
+ *
+ *   // code generation starts.
+ *   if (imm == 1U << exp) {
+ *     result = n >> exp;
+ *   } else if (rvalue.is_wide_m) {
+ *     // pre_shift must be zero when reached here.
+ *     t = (n * rvalue.m) >> 32;
+ *     result = n - t;
+ *     result >>= 1;
+ *     result += t;
+ *     result >>= rvalue.sh - 1;
+ *   } else {
+ *     if (pre_shift)
+ *       result = n >> pre_shift;
+ *     result = ((u64)result * rvalue.m) >> 32;
+ *     result >>= rvalue.sh;
+ *   }
+ */
+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
+
 #endif /* _LINUX_RECIPROCAL_DIV_H */
index e6a0031d1b1fdc1793200d1fc93f48bf1973ecfc..8ad2487a86d5b261126e0a3c05357356cb039078 100644 (file)
@@ -66,7 +66,7 @@ struct rfkill_ops {
 
 #if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
 /**
- * rfkill_alloc - allocate rfkill structure
+ * rfkill_alloc - Allocate rfkill structure
  * @name: name of the struct -- the string is not copied internally
  * @parent: device that has rf switch on it
  * @type: type of the switch (RFKILL_TYPE_*)
@@ -112,7 +112,7 @@ void rfkill_pause_polling(struct rfkill *rfkill);
 /**
  * rfkill_resume_polling(struct rfkill *rfkill)
  *
- * Pause polling -- say transmitter is off for other reasons.
+ * Resume polling
  * NOTE: not necessary for suspend/resume -- in that case the
  * core stops polling anyway
  */
@@ -130,7 +130,7 @@ void rfkill_resume_polling(struct rfkill *rfkill);
 void rfkill_unregister(struct rfkill *rfkill);
 
 /**
- * rfkill_destroy - free rfkill structure
+ * rfkill_destroy - Free rfkill structure
  * @rfkill: rfkill structure to be destroyed
  *
  * Destroys the rfkill structure.
@@ -140,7 +140,7 @@ void rfkill_destroy(struct rfkill *rfkill);
 /**
  * rfkill_set_hw_state - Set the internal rfkill hardware block state
  * @rfkill: pointer to the rfkill class to modify.
- * @state: the current hardware block state to set
+ * @blocked: the current hardware block state to set
  *
  * rfkill drivers that get events when the hard-blocked state changes
  * use this function to notify the rfkill core (and through that also
@@ -161,7 +161,7 @@ bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked);
 /**
  * rfkill_set_sw_state - Set the internal rfkill software block state
  * @rfkill: pointer to the rfkill class to modify.
- * @state: the current software block state to set
+ * @blocked: the current software block state to set
  *
  * rfkill drivers that get events when the soft-blocked state changes
  * (yes, some platforms directly act on input but allow changing again)
@@ -183,7 +183,7 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked);
 /**
  * rfkill_init_sw_state - Initialize persistent software block state
  * @rfkill: pointer to the rfkill class to modify.
- * @state: the current software block state to set
+ * @blocked: the current software block state to set
  *
  * rfkill drivers that preserve their software block state over power off
  * use this function to notify the rfkill core (and through that also
@@ -208,17 +208,17 @@ void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked);
 void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw);
 
 /**
- * rfkill_blocked - query rfkill block
+ * rfkill_blocked - Query rfkill block state
  *
  * @rfkill: rfkill struct to query
  */
 bool rfkill_blocked(struct rfkill *rfkill);
 
 /**
- * rfkill_find_type - Helpper for finding rfkill type by name
+ * rfkill_find_type - Helper for finding rfkill type by name
  * @name: the name of the type
  *
- * Returns enum rfkill_type that conrresponds the name.
+ * Returns enum rfkill_type that corresponds to the name.
  */
 enum rfkill_type rfkill_find_type(const char *name);
 
@@ -296,7 +296,7 @@ static inline enum rfkill_type rfkill_find_type(const char *name)
 const char *rfkill_get_led_trigger_name(struct rfkill *rfkill);
 
 /**
- * rfkill_set_led_trigger_name -- set the LED trigger name
+ * rfkill_set_led_trigger_name - Set the LED trigger name
  * @rfkill: rfkill struct
  * @name: LED trigger name
  *
diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
new file mode 100644 (file)
index 0000000..763d613
--- /dev/null
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Simple structures that might be needed in include
+ * files.
+ */
+
+#ifndef _LINUX_RHASHTABLE_TYPES_H
+#define _LINUX_RHASHTABLE_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+
+struct rhash_head {
+       struct rhash_head __rcu         *next;
+};
+
+struct rhlist_head {
+       struct rhash_head               rhead;
+       struct rhlist_head __rcu        *next;
+};
+
+struct bucket_table;
+
+/**
+ * struct rhashtable_compare_arg - Key for the function rhashtable_compare
+ * @ht: Hash table
+ * @key: Key to compare against
+ */
+struct rhashtable_compare_arg {
+       struct rhashtable *ht;
+       const void *key;
+};
+
+typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
+                              const void *obj);
+
+/**
+ * struct rhashtable_params - Hash table construction parameters
+ * @nelem_hint: Hint on number of elements, should be 75% of desired size
+ * @key_len: Length of key
+ * @key_offset: Offset of key in struct to be hashed
+ * @head_offset: Offset of rhash_head in struct to be hashed
+ * @max_size: Maximum size while expanding
+ * @min_size: Minimum size while shrinking
+ * @locks_mul: Number of bucket locks to allocate per cpu (default: 32)
+ * @automatic_shrinking: Enable automatic shrinking of tables
+ * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
+ * @obj_hashfn: Function to hash object
+ * @obj_cmpfn: Function to compare key with object
+ */
+struct rhashtable_params {
+       u16                     nelem_hint;
+       u16                     key_len;
+       u16                     key_offset;
+       u16                     head_offset;
+       unsigned int            max_size;
+       u16                     min_size;
+       bool                    automatic_shrinking;
+       u8                      locks_mul;
+       rht_hashfn_t            hashfn;
+       rht_obj_hashfn_t        obj_hashfn;
+       rht_obj_cmpfn_t         obj_cmpfn;
+};
+
+/**
+ * struct rhashtable - Hash table handle
+ * @tbl: Bucket table
+ * @key_len: Key length for hashfn
+ * @max_elems: Maximum number of elements in table
+ * @p: Configuration parameters
+ * @rhlist: True if this is an rhltable
+ * @run_work: Deferred worker to expand/shrink asynchronously
+ * @mutex: Mutex to protect current/future table swapping
+ * @lock: Spin lock to protect walker list
+ * @nelems: Number of elements in table
+ */
+struct rhashtable {
+       struct bucket_table __rcu       *tbl;
+       unsigned int                    key_len;
+       unsigned int                    max_elems;
+       struct rhashtable_params        p;
+       bool                            rhlist;
+       struct work_struct              run_work;
+       struct mutex                    mutex;
+       spinlock_t                      lock;
+       atomic_t                        nelems;
+};
+
+/**
+ * struct rhltable - Hash table with duplicate objects in a list
+ * @ht: Underlying rhtable
+ */
+struct rhltable {
+       struct rhashtable ht;
+};
+
+/**
+ * struct rhashtable_walker - Hash table walker
+ * @list: List entry on list of walkers
+ * @tbl: The table that we were walking over
+ */
+struct rhashtable_walker {
+       struct list_head list;
+       struct bucket_table *tbl;
+};
+
+/**
+ * struct rhashtable_iter - Hash table iterator
+ * @ht: Table to iterate through
+ * @p: Current pointer
+ * @list: Current hash list pointer
+ * @walker: Associated rhashtable walker
+ * @slot: Current slot
+ * @skip: Number of entries to skip in slot
+ */
+struct rhashtable_iter {
+       struct rhashtable *ht;
+       struct rhash_head *p;
+       struct rhlist_head *list;
+       struct rhashtable_walker walker;
+       unsigned int slot;
+       unsigned int skip;
+       bool end_of_table;
+};
+
+int rhashtable_init(struct rhashtable *ht,
+                   const struct rhashtable_params *params);
+int rhltable_init(struct rhltable *hlt,
+                 const struct rhashtable_params *params);
+
+#endif /* _LINUX_RHASHTABLE_TYPES_H */
index 4e1f535c2034e8d292f3d5b5bd569423878bab58..eb71110392479784db1d4a0b9d86d2eee6631789 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Resizable, Scalable, Concurrent Hash Table
  *
 #ifndef _LINUX_RHASHTABLE_H
 #define _LINUX_RHASHTABLE_H
 
-#include <linux/atomic.h>
-#include <linux/compiler.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/jhash.h>
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
-#include <linux/mutex.h>
 #include <linux/rculist.h>
 
+#include <linux/rhashtable-types.h>
 /*
  * The end of the chain is marked with a special nulls marks which has
- * the following format:
- *
- * +-------+-----------------------------------------------------+-+
- * | Base  |                      Hash                           |1|
- * +-------+-----------------------------------------------------+-+
- *
- * Base (4 bits) : Reserved to distinguish between multiple tables.
- *                 Specified via &struct rhashtable_params.nulls_base.
- * Hash (27 bits): Full hash (unmasked) of first element added to bucket
- * 1 (1 bit)     : Nulls marker (always set)
- *
- * The remaining bits of the next pointer remain unused for now.
+ * the least significant bit set.
  */
-#define RHT_BASE_BITS          4
-#define RHT_HASH_BITS          27
-#define RHT_BASE_SHIFT         RHT_HASH_BITS
-
-/* Base bits plus 1 bit for nulls marker */
-#define RHT_HASH_RESERVED_SPACE        (RHT_BASE_BITS + 1)
 
 /* Maximum chain length before rehash
  *
  */
 #define RHT_ELASTICITY 16u
 
-struct rhash_head {
-       struct rhash_head __rcu         *next;
-};
-
-struct rhlist_head {
-       struct rhash_head               rhead;
-       struct rhlist_head __rcu        *next;
-};
-
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
@@ -102,132 +75,14 @@ struct bucket_table {
        struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
-/**
- * struct rhashtable_compare_arg - Key for the function rhashtable_compare
- * @ht: Hash table
- * @key: Key to compare against
- */
-struct rhashtable_compare_arg {
-       struct rhashtable *ht;
-       const void *key;
-};
-
-typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
-                              const void *obj);
-
-struct rhashtable;
-
-/**
- * struct rhashtable_params - Hash table construction parameters
- * @nelem_hint: Hint on number of elements, should be 75% of desired size
- * @key_len: Length of key
- * @key_offset: Offset of key in struct to be hashed
- * @head_offset: Offset of rhash_head in struct to be hashed
- * @max_size: Maximum size while expanding
- * @min_size: Minimum size while shrinking
- * @locks_mul: Number of bucket locks to allocate per cpu (default: 32)
- * @automatic_shrinking: Enable automatic shrinking of tables
- * @nulls_base: Base value to generate nulls marker
- * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
- * @obj_hashfn: Function to hash object
- * @obj_cmpfn: Function to compare key with object
- */
-struct rhashtable_params {
-       u16                     nelem_hint;
-       u16                     key_len;
-       u16                     key_offset;
-       u16                     head_offset;
-       unsigned int            max_size;
-       u16                     min_size;
-       bool                    automatic_shrinking;
-       u8                      locks_mul;
-       u32                     nulls_base;
-       rht_hashfn_t            hashfn;
-       rht_obj_hashfn_t        obj_hashfn;
-       rht_obj_cmpfn_t         obj_cmpfn;
-};
-
-/**
- * struct rhashtable - Hash table handle
- * @tbl: Bucket table
- * @key_len: Key length for hashfn
- * @max_elems: Maximum number of elements in table
- * @p: Configuration parameters
- * @rhlist: True if this is an rhltable
- * @run_work: Deferred worker to expand/shrink asynchronously
- * @mutex: Mutex to protect current/future table swapping
- * @lock: Spin lock to protect walker list
- * @nelems: Number of elements in table
- */
-struct rhashtable {
-       struct bucket_table __rcu       *tbl;
-       unsigned int                    key_len;
-       unsigned int                    max_elems;
-       struct rhashtable_params        p;
-       bool                            rhlist;
-       struct work_struct              run_work;
-       struct mutex                    mutex;
-       spinlock_t                      lock;
-       atomic_t                        nelems;
-};
-
-/**
- * struct rhltable - Hash table with duplicate objects in a list
- * @ht: Underlying rhtable
- */
-struct rhltable {
-       struct rhashtable ht;
-};
-
-/**
- * struct rhashtable_walker - Hash table walker
- * @list: List entry on list of walkers
- * @tbl: The table that we were walking over
- */
-struct rhashtable_walker {
-       struct list_head list;
-       struct bucket_table *tbl;
-};
-
-/**
- * struct rhashtable_iter - Hash table iterator
- * @ht: Table to iterate through
- * @p: Current pointer
- * @list: Current hash list pointer
- * @walker: Associated rhashtable walker
- * @slot: Current slot
- * @skip: Number of entries to skip in slot
- */
-struct rhashtable_iter {
-       struct rhashtable *ht;
-       struct rhash_head *p;
-       struct rhlist_head *list;
-       struct rhashtable_walker walker;
-       unsigned int slot;
-       unsigned int skip;
-       bool end_of_table;
-};
-
-static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
-{
-       return NULLS_MARKER(ht->p.nulls_base + hash);
-}
-
-#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
-       ((ptr) = (typeof(ptr)) rht_marker(ht, hash))
+#define INIT_RHT_NULLS_HEAD(ptr)       \
+       ((ptr) = (typeof(ptr)) NULLS_MARKER(0))
 
 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
 {
        return ((unsigned long) ptr & 1);
 }
 
-static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr)
-{
-       return ((unsigned long) ptr) >> 1;
-}
-
 static inline void *rht_obj(const struct rhashtable *ht,
                            const struct rhash_head *he)
 {
@@ -237,7 +92,7 @@ static inline void *rht_obj(const struct rhashtable *ht,
 static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
                                            unsigned int hash)
 {
-       return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1);
+       return hash & (tbl->size - 1);
 }
 
 static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
@@ -376,11 +231,6 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
 }
 #endif /* CONFIG_PROVE_LOCKING */
 
-int rhashtable_init(struct rhashtable *ht,
-                   const struct rhashtable_params *params);
-int rhltable_init(struct rhltable *hlt,
-                 const struct rhashtable_params *params);
-
 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
                             struct rhash_head *obj);
 
@@ -745,7 +595,7 @@ static inline void *__rhashtable_insert_fast(
        lock = rht_bucket_lock(tbl, hash);
        spin_lock_bh(lock);
 
-       if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) {
+       if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
 slow_path:
                spin_unlock_bh(lock);
                rcu_read_unlock();
index b36c76635f182007584076ff5fb4d644cb134a0f..83d94341e0032795035b9545483f3cc32a6d9500 100644 (file)
@@ -801,4 +801,11 @@ struct sctp_strreset_resptsn {
        __be32 receivers_next_tsn;
 };
 
+enum {
+       SCTP_DSCP_SET_MASK = 0x1,
+       SCTP_DSCP_VAL_MASK = 0xfc,
+       SCTP_FLOWLABEL_SET_MASK = 0x100000,
+       SCTP_FLOWLABEL_VAL_MASK = 0xfffff
+};
+
 #endif /* __LINUX_SCTP_H__ */
index ebce9e24906a7121c6c24c806be252dbd7887334..d37518e89db2ddea762bb483cc74c13040a0f803 100644 (file)
@@ -231,6 +231,50 @@ struct sfp_eeprom_id {
        struct sfp_eeprom_ext ext;
 } __packed;
 
+struct sfp_diag {
+       __be16 temp_high_alarm;
+       __be16 temp_low_alarm;
+       __be16 temp_high_warn;
+       __be16 temp_low_warn;
+       __be16 volt_high_alarm;
+       __be16 volt_low_alarm;
+       __be16 volt_high_warn;
+       __be16 volt_low_warn;
+       __be16 bias_high_alarm;
+       __be16 bias_low_alarm;
+       __be16 bias_high_warn;
+       __be16 bias_low_warn;
+       __be16 txpwr_high_alarm;
+       __be16 txpwr_low_alarm;
+       __be16 txpwr_high_warn;
+       __be16 txpwr_low_warn;
+       __be16 rxpwr_high_alarm;
+       __be16 rxpwr_low_alarm;
+       __be16 rxpwr_high_warn;
+       __be16 rxpwr_low_warn;
+       __be16 laser_temp_high_alarm;
+       __be16 laser_temp_low_alarm;
+       __be16 laser_temp_high_warn;
+       __be16 laser_temp_low_warn;
+       __be16 tec_cur_high_alarm;
+       __be16 tec_cur_low_alarm;
+       __be16 tec_cur_high_warn;
+       __be16 tec_cur_low_warn;
+       __be32 cal_rxpwr4;
+       __be32 cal_rxpwr3;
+       __be32 cal_rxpwr2;
+       __be32 cal_rxpwr1;
+       __be32 cal_rxpwr0;
+       __be16 cal_txi_slope;
+       __be16 cal_txi_offset;
+       __be16 cal_txpwr_slope;
+       __be16 cal_txpwr_offset;
+       __be16 cal_t_slope;
+       __be16 cal_t_offset;
+       __be16 cal_v_slope;
+       __be16 cal_v_offset;
+} __packed;
+
 /* SFP EEPROM registers */
 enum {
        SFP_PHYS_ID                     = 0x00,
@@ -384,7 +428,33 @@ enum {
        SFP_TEC_CUR                     = 0x6c,
 
        SFP_STATUS                      = 0x6e,
-       SFP_ALARM                       = 0x70,
+       SFP_ALARM0                      = 0x70,
+       SFP_ALARM0_TEMP_HIGH            = BIT(7),
+       SFP_ALARM0_TEMP_LOW             = BIT(6),
+       SFP_ALARM0_VCC_HIGH             = BIT(5),
+       SFP_ALARM0_VCC_LOW              = BIT(4),
+       SFP_ALARM0_TX_BIAS_HIGH         = BIT(3),
+       SFP_ALARM0_TX_BIAS_LOW          = BIT(2),
+       SFP_ALARM0_TXPWR_HIGH           = BIT(1),
+       SFP_ALARM0_TXPWR_LOW            = BIT(0),
+
+       SFP_ALARM1                      = 0x71,
+       SFP_ALARM1_RXPWR_HIGH           = BIT(7),
+       SFP_ALARM1_RXPWR_LOW            = BIT(6),
+
+       SFP_WARN0                       = 0x74,
+       SFP_WARN0_TEMP_HIGH             = BIT(7),
+       SFP_WARN0_TEMP_LOW              = BIT(6),
+       SFP_WARN0_VCC_HIGH              = BIT(5),
+       SFP_WARN0_VCC_LOW               = BIT(4),
+       SFP_WARN0_TX_BIAS_HIGH          = BIT(3),
+       SFP_WARN0_TX_BIAS_LOW           = BIT(2),
+       SFP_WARN0_TXPWR_HIGH            = BIT(1),
+       SFP_WARN0_TXPWR_LOW             = BIT(0),
+
+       SFP_WARN1                       = 0x75,
+       SFP_WARN1_RXPWR_HIGH            = BIT(7),
+       SFP_WARN1_RXPWR_LOW             = BIT(6),
 
        SFP_EXT_STATUS                  = 0x76,
        SFP_VSL                         = 0x78,
index 610a201126ee031166798baaf8ecae74fe478c4d..fd3cb1b247df8d7df21008ed000dfb966391b340 100644 (file)
@@ -641,6 +641,7 @@ typedef unsigned char *sk_buff_data_t;
  *     @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
  *     @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
  *     @dst_pending_confirm: need to confirm neighbour
+ *     @decrypted: Decrypted SKB
   *    @napi_id: id of the NAPI struct this skb came from
  *     @secmark: security marking
  *     @mark: Generic packet mark
@@ -678,7 +679,8 @@ struct sk_buff {
                                int                     ip_defrag_offset;
                        };
                };
-               struct rb_node  rbnode; /* used in netem & tcp stack */
+               struct rb_node          rbnode; /* used in netem & tcp stack */
+               struct list_head        list;
        };
        struct sock             *sk;
 
@@ -791,6 +793,9 @@ struct sk_buff {
        __u8                    tc_redirected:1;
        __u8                    tc_from_ingress:1;
 #endif
+#ifdef CONFIG_TLS_DEVICE
+       __u8                    decrypted:1;
+#endif
 
 #ifdef CONFIG_NET_SCHED
        __u16                   tc_index;       /* traffic control index */
index 72705eaf4b84060a45bf04d5170f389a18010eac..263e37271afda18f3d61c99272d34da15dfdca29 100644 (file)
@@ -89,7 +89,7 @@ struct tcp_sack_block {
 
 struct tcp_options_received {
 /*     PAWS/RTTM data  */
-       long    ts_recent_stamp;/* Time we stored ts_recent (for aging) */
+       int     ts_recent_stamp;/* Time we stored ts_recent (for aging) */
        u32     ts_recent;      /* Time stamp to echo next              */
        u32     rcv_tsval;      /* Time stamp value                     */
        u32     rcv_tsecr;      /* Time stamp echo reply                */
@@ -181,10 +181,16 @@ struct tcp_sock {
        u32     data_segs_out;  /* RFC4898 tcpEStatsPerfDataSegsOut
                                 * total number of data segments sent.
                                 */
+       u64     bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut
+                                * total number of data bytes sent.
+                                */
        u64     bytes_acked;    /* RFC4898 tcpEStatsAppHCThruOctetsAcked
                                 * sum(delta(snd_una)), or how many bytes
                                 * were acked.
                                 */
+       u32     dsack_dups;     /* RFC4898 tcpEStatsStackDSACKDups
+                                * total number of DSACK blocks received
+                                */
        u32     snd_una;        /* First byte we want an ack for        */
        u32     snd_sml;        /* Last byte of the most recently transmitted small packet */
        u32     rcv_tstamp;     /* timestamp of last received ACK (for keepalives) */
@@ -214,8 +220,7 @@ struct tcp_sock {
 #define TCP_RACK_RECOVERY_THRESH 16
                u8 reo_wnd_persist:5, /* No. of recovery since last adj */
                   dsack_seen:1, /* Whether DSACK seen after last adj */
-                  advanced:1,   /* mstamp advanced since last lost marking */
-                  reord:1;      /* reordering detected */
+                  advanced:1;   /* mstamp advanced since last lost marking */
        } rack;
        u16     advmss;         /* Advertised MSS                       */
        u8      compressed_ack;
@@ -261,6 +266,7 @@ struct tcp_sock {
        u8      ecn_flags;      /* ECN status bits.                     */
        u8      keepalive_probes; /* num of allowed keep alive probes   */
        u32     reordering;     /* Packet reordering metric.            */
+       u32     reord_seen;     /* number of data packet reordering events */
        u32     snd_up;         /* Urgent pointer               */
 
 /*
@@ -330,6 +336,9 @@ struct tcp_sock {
                                 * the first SYN. */
        u32     undo_marker;    /* snd_una upon a new recovery episode. */
        int     undo_retrans;   /* number of undoable retransmissions. */
+       u64     bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans
+                                * Total data bytes retransmitted
+                                */
        u32     total_retrans;  /* Total retransmits for entire connection */
 
        u32     urg_seq;        /* Seq of received urgent pointer */
@@ -350,6 +359,7 @@ struct tcp_sock {
 #endif
 
 /* Receiver side RTT estimation */
+       u32 rcv_rtt_last_tsecr;
        struct {
                u32     rtt_us;
                u32     seq;
@@ -425,7 +435,7 @@ struct tcp_timewait_sock {
        /* The time we sent the last out-of-window ACK: */
        u32                       tw_last_oow_ack_time;
 
-       long                      tw_ts_recent_stamp;
+       int                       tw_ts_recent_stamp;
 #ifdef CONFIG_TCP_MD5SIG
        struct tcp_md5sig_key     *tw_md5_key;
 #endif
index ca840345571bf6cf9253647c11112252fa7b6241..320d49d85484d4ac2d0fdbbe365ad339bf949c0b 100644 (file)
@@ -74,8 +74,8 @@ struct udp_sock {
        void (*encap_destroy)(struct sock *sk);
 
        /* GRO functions for UDP socket */
-       struct sk_buff **       (*gro_receive)(struct sock *sk,
-                                              struct sk_buff **head,
+       struct sk_buff *        (*gro_receive)(struct sock *sk,
+                                              struct list_head *head,
                                               struct sk_buff *skb);
        int                     (*gro_complete)(struct sock *sk,
                                                struct sk_buff *skb,
index 9e59ebfded625426c2c3764225d58081b704cf0a..8c9bc02d05e1c9997d17f6b0ecaaabc716e4abe9 100644 (file)
@@ -6,6 +6,7 @@
  * Public action API for classifiers/qdiscs
 */
 
+#include <linux/refcount.h>
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/net_namespace.h>
@@ -26,8 +27,8 @@ struct tc_action {
        struct tcf_idrinfo              *idrinfo;
 
        u32                             tcfa_index;
-       int                             tcfa_refcnt;
-       int                             tcfa_bindcnt;
+       refcount_t                      tcfa_refcnt;
+       atomic_t                        tcfa_bindcnt;
        u32                             tcfa_capab;
        int                             tcfa_action;
        struct tcf_t                    tcfa_tm;
@@ -37,7 +38,7 @@ struct tc_action {
        spinlock_t                      tcfa_lock;
        struct gnet_stats_basic_cpu __percpu *cpu_bstats;
        struct gnet_stats_queue __percpu *cpu_qstats;
-       struct tc_cookie        *act_cookie;
+       struct tc_cookie        __rcu *act_cookie;
        struct tcf_chain        *goto_chain;
 };
 #define tcf_index      common.tcfa_index
@@ -84,14 +85,15 @@ struct tc_action_ops {
        size_t  size;
        struct module           *owner;
        int     (*act)(struct sk_buff *, const struct tc_action *,
-                      struct tcf_result *);
+                      struct tcf_result *); /* called under RCU BH lock*/
        int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
        void    (*cleanup)(struct tc_action *);
        int     (*lookup)(struct net *net, struct tc_action **a, u32 index,
                          struct netlink_ext_ack *extack);
        int     (*init)(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **act, int ovr,
-                       int bind, struct netlink_ext_ack *extack);
+                       int bind, bool rtnl_held,
+                       struct netlink_ext_ack *extack);
        int     (*walk)(struct net *, struct sk_buff *,
                        struct netlink_callback *, int,
                        const struct tc_action_ops *,
@@ -99,6 +101,7 @@ struct tc_action_ops {
        void    (*stats_update)(struct tc_action *, u64, u32, u64);
        size_t  (*get_fill_size)(const struct tc_action *act);
        struct net_device *(*get_dev)(const struct tc_action *a);
+       int     (*delete)(struct net *net, u32 index);
 };
 
 struct tc_action_net {
@@ -151,6 +154,10 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
                   int bind, bool cpustats);
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
+void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
+int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
+                       struct tc_action **a, int bind);
+int tcf_idr_delete_index(struct tc_action_net *tn, u32 index);
 int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
 
 static inline int tcf_idr_release(struct tc_action *a, bool bind)
@@ -161,18 +168,20 @@ static inline int tcf_idr_release(struct tc_action *a, bool bind)
 int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
 int tcf_unregister_action(struct tc_action_ops *a,
                          struct pernet_operations *ops);
-int tcf_action_destroy(struct list_head *actions, int bind);
+int tcf_action_destroy(struct tc_action *actions[], int bind);
 int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
                    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
                    struct nlattr *est, char *name, int ovr, int bind,
-                   struct list_head *actions, size_t *attr_size,
-                   struct netlink_ext_ack *extack);
+                   struct tc_action *actions[], size_t *attr_size,
+                   bool rtnl_held, struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    struct nlattr *nla, struct nlattr *est,
                                    char *name, int ovr, int bind,
+                                   bool rtnl_held,
                                    struct netlink_ext_ack *extack);
-int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
+int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
+                   int ref);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
@@ -190,9 +199,6 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 #endif
 }
 
-typedef int tc_setup_cb_t(enum tc_setup_type type,
-                         void *type_data, void *cb_priv);
-
 #ifdef CONFIG_NET_CLS_ACT
 int tc_setup_cb_egdev_register(const struct net_device *dev,
                               tc_setup_cb_t *cb, void *cb_priv);
index 8ae8ee004258417b6db619379741df77ea1057f3..f53edb3754bc4fe5203794dc444bc328dafc9c38 100644 (file)
@@ -61,7 +61,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
                           struct msghdr *, size_t,
                           rxrpc_notify_end_tx_t);
 int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
-                          void *, size_t, size_t *, bool, u32 *, u16 *);
+                          struct iov_iter *, bool, u32 *, u16 *);
 bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
                             u32, int, const char *);
 void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
index f358ad5e421457b0312f7ffff2f766004ebca663..fc3111515f5cbec0a230cfd242c5a8ba9dc24571 100644 (file)
@@ -283,7 +283,7 @@ static inline const char *bond_3ad_churn_desc(churn_state_t state)
                "none",
                "unknown"
        };
-       int max_size = sizeof(churn_description) / sizeof(churn_description[0]);
+       int max_size = ARRAY_SIZE(churn_description);
 
        if (state >= max_size)
                state = max_size - 1;
index 808f1d1673494d3e09b1c42f229308371f4b2ad9..a2d058170ea3c38739263570bcf14f2a0935e16f 100644 (file)
@@ -411,6 +411,19 @@ static inline bool bond_slave_can_tx(struct slave *slave)
               bond_is_active_slave(slave);
 }
 
+static inline bool bond_is_active_slave_dev(const struct net_device *slave_dev)
+{
+       struct slave *slave;
+       bool active;
+
+       rcu_read_lock();
+       slave = bond_slave_get_rcu(slave_dev);
+       active = bond_is_active_slave(slave);
+       rcu_read_unlock();
+
+       return active;
+}
+
 static inline void bond_hw_addr_copy(u8 *dst, const u8 *src, unsigned int len)
 {
        if (len == ETH_ALEN) {
index c5187438af38fe4b4c309ea327757b316288c4c9..ba61cdd09eaa811c8b4a529c47a9dde45909a143 100644 (file)
@@ -121,21 +121,6 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 #endif
 }
 
-static inline void sock_poll_busy_loop(struct socket *sock, __poll_t events)
-{
-       if (sk_can_busy_loop(sock->sk) &&
-           events && (events & POLL_BUSY_LOOP)) {
-               /* once, only if requested by syscall */
-               sk_busy_loop(sock->sk, 1);
-       }
-}
-
-/* if this socket can poll_ll, tell the system call */
-static inline __poll_t sock_poll_busy_flag(struct socket *sock)
-{
-       return sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0;
-}
-
 /* used in the NIC receive handler to mark the skb */
 static inline void skb_mark_napi_id(struct sk_buff *skb,
                                    struct napi_struct *napi)
@@ -151,6 +136,7 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 #ifdef CONFIG_NET_RX_BUSY_POLL
        sk->sk_napi_id = skb->napi_id;
 #endif
+       sk_rx_queue_set(sk, skb);
 }
 
 /* variant used for unconnected sockets */
index 1beb3ead038561d84c618757919871393f8c80c4..9a850973e09a739aaa72d5be951436640bc9c131 100644 (file)
@@ -285,6 +285,41 @@ struct ieee80211_sta_vht_cap {
        struct ieee80211_vht_mcs_info vht_mcs;
 };
 
+#define IEEE80211_HE_PPE_THRES_MAX_LEN         25
+
+/**
+ * struct ieee80211_sta_he_cap - STA's HE capabilities
+ *
+ * This structure describes most essential parameters needed
+ * to describe 802.11ax HE capabilities for a STA.
+ *
+ * @has_he: true iff HE data is valid.
+ * @he_cap_elem: Fixed portion of the HE capabilities element.
+ * @he_mcs_nss_supp: The supported NSS/MCS combinations.
+ * @ppe_thres: Holds the PPE Thresholds data.
+ */
+struct ieee80211_sta_he_cap {
+       bool has_he;
+       struct ieee80211_he_cap_elem he_cap_elem;
+       struct ieee80211_he_mcs_nss_supp he_mcs_nss_supp;
+       u8 ppe_thres[IEEE80211_HE_PPE_THRES_MAX_LEN];
+};
+
+/**
+ * struct ieee80211_sband_iftype_data
+ *
+ * This structure encapsulates sband data that is relevant for the
+ * interface types defined in @types_mask.  Each type in the
+ * @types_mask must be unique across all instances of iftype_data.
+ *
+ * @types_mask: interface types mask
+ * @he_cap: holds the HE capabilities
+ */
+struct ieee80211_sband_iftype_data {
+       u16 types_mask;
+       struct ieee80211_sta_he_cap he_cap;
+};
+
 /**
  * struct ieee80211_supported_band - frequency band definition
  *
@@ -301,6 +336,11 @@ struct ieee80211_sta_vht_cap {
  * @n_bitrates: Number of bitrates in @bitrates
  * @ht_cap: HT capabilities in this band
  * @vht_cap: VHT capabilities in this band
+ * @n_iftype_data: number of iftype data entries
+ * @iftype_data: interface type data entries.  Note that the bits in
+ *     @types_mask inside this structure cannot overlap (i.e. only
+ *     one occurrence of each type is allowed across all instances of
+ *     iftype_data).
  */
 struct ieee80211_supported_band {
        struct ieee80211_channel *channels;
@@ -310,8 +350,55 @@ struct ieee80211_supported_band {
        int n_bitrates;
        struct ieee80211_sta_ht_cap ht_cap;
        struct ieee80211_sta_vht_cap vht_cap;
+       u16 n_iftype_data;
+       const struct ieee80211_sband_iftype_data *iftype_data;
 };
 
+/**
+ * ieee80211_get_sband_iftype_data - return sband data for a given iftype
+ * @sband: the sband to search for the STA on
+ * @iftype: enum nl80211_iftype
+ *
+ * Return: pointer to struct ieee80211_sband_iftype_data, or NULL is none found
+ */
+static inline const struct ieee80211_sband_iftype_data *
+ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
+                               u8 iftype)
+{
+       int i;
+
+       if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
+               return NULL;
+
+       for (i = 0; i < sband->n_iftype_data; i++)  {
+               const struct ieee80211_sband_iftype_data *data =
+                       &sband->iftype_data[i];
+
+               if (data->types_mask & BIT(iftype))
+                       return data;
+       }
+
+       return NULL;
+}
+
+/**
+ * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA
+ * @sband: the sband to search for the STA on
+ *
+ * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found
+ */
+static inline const struct ieee80211_sta_he_cap *
+ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband)
+{
+       const struct ieee80211_sband_iftype_data *data =
+               ieee80211_get_sband_iftype_data(sband, NL80211_IFTYPE_STATION);
+
+       if (data && data->he_cap.has_he)
+               return &data->he_cap;
+
+       return NULL;
+}
+
 /**
  * wiphy_read_of_freq_limits - read frequency limits from device tree
  *
@@ -899,6 +986,8 @@ enum station_parameters_apply_mask {
  * @opmode_notif: operating mode field from Operating Mode Notification
  * @opmode_notif_used: information if operating mode field is used
  * @support_p2p_ps: information if station supports P2P PS mechanism
+ * @he_capa: HE capabilities of station
+ * @he_capa_len: the length of the HE capabilities
  */
 struct station_parameters {
        const u8 *supported_rates;
@@ -926,6 +1015,8 @@ struct station_parameters {
        u8 opmode_notif;
        bool opmode_notif_used;
        int support_p2p_ps;
+       const struct ieee80211_he_cap_elem *he_capa;
+       u8 he_capa_len;
 };
 
 /**
@@ -1000,12 +1091,14 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
  * @RATE_INFO_FLAGS_60G: 60GHz MCS
+ * @RATE_INFO_FLAGS_HE_MCS: HE MCS information
  */
 enum rate_info_flags {
        RATE_INFO_FLAGS_MCS                     = BIT(0),
        RATE_INFO_FLAGS_VHT_MCS                 = BIT(1),
        RATE_INFO_FLAGS_SHORT_GI                = BIT(2),
        RATE_INFO_FLAGS_60G                     = BIT(3),
+       RATE_INFO_FLAGS_HE_MCS                  = BIT(4),
 };
 
 /**
@@ -1019,6 +1112,7 @@ enum rate_info_flags {
  * @RATE_INFO_BW_40: 40 MHz bandwidth
  * @RATE_INFO_BW_80: 80 MHz bandwidth
  * @RATE_INFO_BW_160: 160 MHz bandwidth
+ * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation
  */
 enum rate_info_bw {
        RATE_INFO_BW_20 = 0,
@@ -1027,6 +1121,7 @@ enum rate_info_bw {
        RATE_INFO_BW_40,
        RATE_INFO_BW_80,
        RATE_INFO_BW_160,
+       RATE_INFO_BW_HE_RU,
 };
 
 /**
@@ -1035,10 +1130,14 @@ enum rate_info_bw {
  * Information about a receiving or transmitting bitrate
  *
  * @flags: bitflag of flags from &enum rate_info_flags
- * @mcs: mcs index if struct describes a 802.11n bitrate
+ * @mcs: mcs index if struct describes an HT/VHT/HE rate
  * @legacy: bitrate in 100kbit/s for 802.11abg
- * @nss: number of streams (VHT only)
+ * @nss: number of streams (VHT & HE only)
  * @bw: bandwidth (from &enum rate_info_bw)
+ * @he_gi: HE guard interval (from &enum nl80211_he_gi)
+ * @he_dcm: HE DCM value
+ * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc,
+ *     only valid if bw is %RATE_INFO_BW_HE_RU)
  */
 struct rate_info {
        u8 flags;
@@ -1046,6 +1145,9 @@ struct rate_info {
        u16 legacy;
        u8 nss;
        u8 bw;
+       u8 he_gi;
+       u8 he_dcm;
+       u8 he_ru_alloc;
 };
 
 /**
index 0e5e91be2d30cf59e3e856e4e3dca2960971a39d..e22a8a3c089b8c8484e0b0195fbfaed246458d78 100644 (file)
@@ -34,6 +34,19 @@ int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_delapp(struct net_device *, struct dcb_app *);
 u8 dcb_ieee_getapp_mask(struct net_device *, struct dcb_app *);
 
+struct dcb_ieee_app_prio_map {
+       u64 map[IEEE_8021QAZ_MAX_TCS];
+};
+void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev,
+                                       struct dcb_ieee_app_prio_map *p_map);
+
+struct dcb_ieee_app_dscp_map {
+       u8 map[64];
+};
+void dcb_ieee_getapp_dscp_prio_mask_map(const struct net_device *dev,
+                                       struct dcb_ieee_app_dscp_map *p_map);
+u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev);
+
 int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd,
                      u32 seq, u32 pid);
 int dcbnl_cee_notify(struct net_device *dev, int event, int cmd,
index e336ea9c73df31df038f5905e5524e57d1652894..b9b89d6604d402eb7ab83f1273526eba7fec42d8 100644 (file)
@@ -27,6 +27,9 @@ struct devlink {
        struct list_head sb_list;
        struct list_head dpipe_table_list;
        struct list_head resource_list;
+       struct list_head param_list;
+       struct list_head region_list;
+       u32 snapshot_id;
        struct devlink_dpipe_headers *dpipe_headers;
        const struct devlink_ops *ops;
        struct device *dev;
@@ -295,6 +298,115 @@ struct devlink_resource {
 
 #define DEVLINK_RESOURCE_ID_PARENT_TOP 0
 
+#define DEVLINK_PARAM_MAX_STRING_VALUE 32
+enum devlink_param_type {
+       DEVLINK_PARAM_TYPE_U8,
+       DEVLINK_PARAM_TYPE_U16,
+       DEVLINK_PARAM_TYPE_U32,
+       DEVLINK_PARAM_TYPE_STRING,
+       DEVLINK_PARAM_TYPE_BOOL,
+};
+
+union devlink_param_value {
+       u8 vu8;
+       u16 vu16;
+       u32 vu32;
+       const char *vstr;
+       bool vbool;
+};
+
+struct devlink_param_gset_ctx {
+       union devlink_param_value val;
+       enum devlink_param_cmode cmode;
+};
+
+/**
+ * struct devlink_param - devlink configuration parameter data
+ * @name: name of the parameter
+ * @generic: indicates if the parameter is generic or driver specific
+ * @type: parameter type
+ * @supported_cmodes: bitmap of supported configuration modes
+ * @get: get parameter value, used for runtime and permanent
+ *       configuration modes
+ * @set: set parameter value, used for runtime and permanent
+ *       configuration modes
+ * @validate: validate input value is applicable (within value range, etc.)
+ *
+ * This struct should be used by the driver to fill the data for
+ * a parameter it registers.
+ */
+struct devlink_param {
+       u32 id;
+       const char *name;
+       bool generic;
+       enum devlink_param_type type;
+       unsigned long supported_cmodes;
+       int (*get)(struct devlink *devlink, u32 id,
+                  struct devlink_param_gset_ctx *ctx);
+       int (*set)(struct devlink *devlink, u32 id,
+                  struct devlink_param_gset_ctx *ctx);
+       int (*validate)(struct devlink *devlink, u32 id,
+                       union devlink_param_value val,
+                       struct netlink_ext_ack *extack);
+};
+
+struct devlink_param_item {
+       struct list_head list;
+       const struct devlink_param *param;
+       union devlink_param_value driverinit_value;
+       bool driverinit_value_valid;
+};
+
+enum devlink_param_generic_id {
+       DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+       DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+       DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
+       DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+
+       /* add new param generic ids above here*/
+       __DEVLINK_PARAM_GENERIC_ID_MAX,
+       DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1,
+};
+
+#define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME "internal_error_reset"
+#define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_MAX_MACS_NAME "max_macs"
+#define DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE DEVLINK_PARAM_TYPE_U32
+
+#define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME "enable_sriov"
+#define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME "region_snapshot_enable"
+#define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)     \
+{                                                                      \
+       .id = DEVLINK_PARAM_GENERIC_ID_##_id,                           \
+       .name = DEVLINK_PARAM_GENERIC_##_id##_NAME,                     \
+       .type = DEVLINK_PARAM_GENERIC_##_id##_TYPE,                     \
+       .generic = true,                                                \
+       .supported_cmodes = _cmodes,                                    \
+       .get = _get,                                                    \
+       .set = _set,                                                    \
+       .validate = _validate,                                          \
+}
+
+#define DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes, _get, _set, _validate)        \
+{                                                                      \
+       .id = _id,                                                      \
+       .name = _name,                                                  \
+       .type = _type,                                                  \
+       .supported_cmodes = _cmodes,                                    \
+       .get = _get,                                                    \
+       .set = _set,                                                    \
+       .validate = _validate,                                          \
+}
+
+struct devlink_region;
+
+typedef void devlink_snapshot_data_dest_t(const void *data);
+
 struct devlink_ops {
        int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
        int (*port_type_set)(struct devlink_port *devlink_port,
@@ -430,6 +542,26 @@ void devlink_resource_occ_get_register(struct devlink *devlink,
                                       void *occ_get_priv);
 void devlink_resource_occ_get_unregister(struct devlink *devlink,
                                         u64 resource_id);
+int devlink_params_register(struct devlink *devlink,
+                           const struct devlink_param *params,
+                           size_t params_count);
+void devlink_params_unregister(struct devlink *devlink,
+                              const struct devlink_param *params,
+                              size_t params_count);
+int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+                                      union devlink_param_value *init_val);
+int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+                                      union devlink_param_value init_val);
+void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
+struct devlink_region *devlink_region_create(struct devlink *devlink,
+                                            const char *region_name,
+                                            u32 region_max_snapshots,
+                                            u64 region_size);
+void devlink_region_destroy(struct devlink_region *region);
+u32 devlink_region_shapshot_id_get(struct devlink *devlink);
+int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+                                  u8 *data, u32 snapshot_id,
+                                  devlink_snapshot_data_dest_t *data_destructor);
 
 #else
 
@@ -622,6 +754,69 @@ devlink_resource_occ_get_unregister(struct devlink *devlink,
 {
 }
 
+static inline int
+devlink_params_register(struct devlink *devlink,
+                       const struct devlink_param *params,
+                       size_t params_count)
+{
+       return 0;
+}
+
+static inline void
+devlink_params_unregister(struct devlink *devlink,
+                         const struct devlink_param *params,
+                         size_t params_count)
+{
+
+}
+
+static inline int
+devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+                                  union devlink_param_value *init_val)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int
+devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+                                  union devlink_param_value init_val)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void
+devlink_param_value_changed(struct devlink *devlink, u32 param_id)
+{
+}
+
+static inline struct devlink_region *
+devlink_region_create(struct devlink *devlink,
+                     const char *region_name,
+                     u32 region_max_snapshots,
+                     u64 region_size)
+{
+       return NULL;
+}
+
+static inline void
+devlink_region_destroy(struct devlink_region *region)
+{
+}
+
+static inline u32
+devlink_region_shapshot_id_get(struct devlink *devlink)
+{
+       return 0;
+}
+
+static inline int
+devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+                              u8 *data, u32 snapshot_id,
+                              devlink_snapshot_data_dest_t *data_destructor)
+{
+       return 0;
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
index fdbd6082945d6448b58c7c4cd52169e2606b3fd2..461e8a7661b7855ca2ee7bec2e4ffb7e9bb2bc2a 100644 (file)
@@ -259,6 +259,9 @@ struct dsa_switch {
        /* Number of switch port queues */
        unsigned int            num_tx_queues;
 
+       unsigned long           *bitmap;
+       unsigned long           _bitmap;
+
        /* Dynamically allocated ports, keep last */
        size_t num_ports;
        struct dsa_port ports[];
index b3219cd8a5a1eef9e1f19004397aa1cd6ae0948e..7f735e76ca7391f07de28b33a2cf63eabda475bf 100644 (file)
@@ -475,6 +475,14 @@ static inline struct dst_entry *xfrm_lookup(struct net *net,
        return dst_orig;
 }
 
+static inline struct dst_entry *
+xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig,
+                     const struct flowi *fl, const struct sock *sk,
+                     int flags, u32 if_id)
+{
+       return dst_orig;
+}
+
 static inline struct dst_entry *xfrm_lookup_route(struct net *net,
                                                  struct dst_entry *dst_orig,
                                                  const struct flowi *fl,
@@ -494,6 +502,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
                              const struct flowi *fl, const struct sock *sk,
                              int flags);
 
+struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
+                                       struct dst_entry *dst_orig,
+                                       const struct flowi *fl,
+                                       const struct sock *sk, int flags,
+                                       u32 if_id);
+
 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
                                    const struct flowi *fl, const struct sock *sk,
                                    int flags);
index adc24df56b907d4598a1b08d4ab5da01eddc0c85..2a17f041f7a1742e069e7c6d49658251e45519e3 100644 (file)
@@ -47,7 +47,7 @@ struct flow_dissector_key_tags {
 struct flow_dissector_key_vlan {
        u16     vlan_id:12,
                vlan_priority:3;
-       u16     padding;
+       __be16  vlan_tpid;
 };
 
 struct flow_dissector_key_mpls {
@@ -206,7 +206,8 @@ enum flow_dissector_key_id {
        FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */
        FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */
        FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
-
+       FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
+       FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
        FLOW_DISSECTOR_KEY_MAX,
 };
 
@@ -237,6 +238,7 @@ struct flow_keys {
        struct flow_dissector_key_basic basic;
        struct flow_dissector_key_tags tags;
        struct flow_dissector_key_vlan vlan;
+       struct flow_dissector_key_vlan cvlan;
        struct flow_dissector_key_keyid keyid;
        struct flow_dissector_key_ports ports;
        struct flow_dissector_key_addrs addrs;
index 960236fb168184325dff962c116d305ff7541469..feef706e1158256569b08aa0fb1b24534ceebaac 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2017          Intel Deutschland GmbH
+ * Copyright (c) 2018          Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -72,6 +73,8 @@ enum ieee80211_radiotap_presence {
        IEEE80211_RADIOTAP_AMPDU_STATUS = 20,
        IEEE80211_RADIOTAP_VHT = 21,
        IEEE80211_RADIOTAP_TIMESTAMP = 22,
+       IEEE80211_RADIOTAP_HE = 23,
+       IEEE80211_RADIOTAP_HE_MU = 24,
 
        /* valid in every it_present bitmap, even vendor namespaces */
        IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29,
@@ -202,6 +205,126 @@ enum ieee80211_radiotap_timestamp_flags {
        IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY = 0x02,
 };
 
+struct ieee80211_radiotap_he {
+       __le16 data1, data2, data3, data4, data5, data6;
+};
+
+enum ieee80211_radiotap_he_bits {
+       IEEE80211_RADIOTAP_HE_DATA1_FORMAT_MASK         = 3,
+       IEEE80211_RADIOTAP_HE_DATA1_FORMAT_SU           = 0,
+       IEEE80211_RADIOTAP_HE_DATA1_FORMAT_EXT_SU       = 1,
+       IEEE80211_RADIOTAP_HE_DATA1_FORMAT_MU           = 2,
+       IEEE80211_RADIOTAP_HE_DATA1_FORMAT_TRIG         = 3,
+
+       IEEE80211_RADIOTAP_HE_DATA1_BSS_COLOR_KNOWN     = 0x0004,
+       IEEE80211_RADIOTAP_HE_DATA1_BEAM_CHANGE_KNOWN   = 0x0008,
+       IEEE80211_RADIOTAP_HE_DATA1_UL_DL_KNOWN         = 0x0010,
+       IEEE80211_RADIOTAP_HE_DATA1_DATA_MCS_KNOWN      = 0x0020,
+       IEEE80211_RADIOTAP_HE_DATA1_DATA_DCM_KNOWN      = 0x0040,
+       IEEE80211_RADIOTAP_HE_DATA1_CODING_KNOWN        = 0x0080,
+       IEEE80211_RADIOTAP_HE_DATA1_LDPC_XSYMSEG_KNOWN  = 0x0100,
+       IEEE80211_RADIOTAP_HE_DATA1_STBC_KNOWN          = 0x0200,
+       IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE_KNOWN    = 0x0400,
+       IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE2_KNOWN   = 0x0800,
+       IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE3_KNOWN   = 0x1000,
+       IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE4_KNOWN   = 0x2000,
+       IEEE80211_RADIOTAP_HE_DATA1_BW_RU_ALLOC_KNOWN   = 0x4000,
+       IEEE80211_RADIOTAP_HE_DATA1_DOPPLER_KNOWN       = 0x8000,
+
+       IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_KNOWN     = 0x0001,
+       IEEE80211_RADIOTAP_HE_DATA2_GI_KNOWN            = 0x0002,
+       IEEE80211_RADIOTAP_HE_DATA2_NUM_LTF_SYMS_KNOWN  = 0x0004,
+       IEEE80211_RADIOTAP_HE_DATA2_PRE_FEC_PAD_KNOWN   = 0x0008,
+       IEEE80211_RADIOTAP_HE_DATA2_TXBF_KNOWN          = 0x0010,
+       IEEE80211_RADIOTAP_HE_DATA2_PE_DISAMBIG_KNOWN   = 0x0020,
+       IEEE80211_RADIOTAP_HE_DATA2_TXOP_KNOWN          = 0x0040,
+       IEEE80211_RADIOTAP_HE_DATA2_MIDAMBLE_KNOWN      = 0x0080,
+       IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET           = 0x3f00,
+       IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET_KNOWN     = 0x4000,
+       IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_SEC       = 0x8000,
+
+       IEEE80211_RADIOTAP_HE_DATA3_BSS_COLOR           = 0x003f,
+       IEEE80211_RADIOTAP_HE_DATA3_BEAM_CHANGE         = 0x0040,
+       IEEE80211_RADIOTAP_HE_DATA3_UL_DL               = 0x0080,
+       IEEE80211_RADIOTAP_HE_DATA3_DATA_MCS            = 0x0f00,
+       IEEE80211_RADIOTAP_HE_DATA3_DATA_DCM            = 0x1000,
+       IEEE80211_RADIOTAP_HE_DATA3_CODING              = 0x2000,
+       IEEE80211_RADIOTAP_HE_DATA3_LDPC_XSYMSEG        = 0x4000,
+       IEEE80211_RADIOTAP_HE_DATA3_STBC                = 0x8000,
+
+       IEEE80211_RADIOTAP_HE_DATA4_SU_MU_SPTL_REUSE    = 0x000f,
+       IEEE80211_RADIOTAP_HE_DATA4_MU_STA_ID           = 0x7ff0,
+       IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE1      = 0x000f,
+       IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE2      = 0x00f0,
+       IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE3      = 0x0f00,
+       IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE4      = 0xf000,
+
+       IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC    = 0x000f,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ      = 0,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ      = 1,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ      = 2,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ     = 3,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_26T        = 4,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_52T        = 5,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_106T       = 6,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_242T       = 7,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_484T       = 8,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_996T       = 9,
+               IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_2x996T     = 10,
+
+       IEEE80211_RADIOTAP_HE_DATA5_GI                  = 0x0030,
+               IEEE80211_RADIOTAP_HE_DATA5_GI_0_8                      = 0,
+               IEEE80211_RADIOTAP_HE_DATA5_GI_1_6                      = 1,
+               IEEE80211_RADIOTAP_HE_DATA5_GI_3_2                      = 2,
+
+       IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE            = 0x00c0,
+               IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_UNKNOWN            = 0,
+               IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_1X                 = 1,
+               IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_2X                 = 2,
+               IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_4X                 = 3,
+       IEEE80211_RADIOTAP_HE_DATA5_NUM_LTF_SYMS        = 0x0700,
+       IEEE80211_RADIOTAP_HE_DATA5_PRE_FEC_PAD         = 0x3000,
+       IEEE80211_RADIOTAP_HE_DATA5_TXBF                = 0x4000,
+       IEEE80211_RADIOTAP_HE_DATA5_PE_DISAMBIG         = 0x8000,
+
+       IEEE80211_RADIOTAP_HE_DATA6_NSTS                = 0x000f,
+       IEEE80211_RADIOTAP_HE_DATA6_DOPPLER             = 0x0010,
+       IEEE80211_RADIOTAP_HE_DATA6_TXOP                = 0x7f00,
+       IEEE80211_RADIOTAP_HE_DATA6_MIDAMBLE_PDCTY      = 0x8000,
+};
+
+struct ieee80211_radiotap_he_mu {
+       __le16 flags1, flags2;
+       u8 ru_ch1[4];
+       u8 ru_ch2[4];
+};
+
+enum ieee80211_radiotap_he_mu_bits {
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_MCS               = 0x000f,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_MCS_KNOWN         = 0x0010,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_DCM               = 0x0020,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_DCM_KNOWN         = 0x0040,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH2_CTR_26T_RU_KNOWN    = 0x0080,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_RU_KNOWN            = 0x0100,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH2_RU_KNOWN            = 0x0200,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_CTR_26T_RU_KNOWN    = 0x1000,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_CTR_26T_RU          = 0x2000,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_COMP_KNOWN        = 0x4000,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_SYMS_USERS_KNOWN  = 0x8000,
+
+       IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW        = 0x0003,
+               IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_20MHZ  = 0x0000,
+               IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_40MHZ  = 0x0001,
+               IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_80MHZ  = 0x0002,
+               IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_160MHZ = 0x0003,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_KNOWN  = 0x0004,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS2_SIG_B_COMP              = 0x0008,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS2_SIG_B_SYMS_USERS        = 0x00f0,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS2_PUNC_FROM_SIG_A_BW      = 0x0300,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS2_PUNC_FROM_SIG_A_BW_KNOWN= 0x0400,
+       IEEE80211_RADIOTAP_HE_MU_FLAGS2_CH2_CTR_26T_RU          = 0x0800,
+};
+
 /**
  * ieee80211_get_radiotap_len - get radiotap header length
  */
index 384b90c62c0bce9b60d74383a2c68287a3f30d95..3ca969cbd16117fe15b1521333c5d7a28c8709f7 100644 (file)
@@ -43,7 +43,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
 int inet_recv_error(struct sock *sk, struct msghdr *msg, int len,
                    int *addr_len);
 
-struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb);
 int inet_gro_complete(struct sk_buff *skb, int nhoff);
 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
                                 netdev_features_t features);
index ed07e3786d98614898bd5ec9804afb425ba82cec..f4272a29dc445ec9745b943578f8288bf92a2e8e 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef __NET_FRAG_H__
 #define __NET_FRAG_H__
 
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 struct netns_frags {
        /* sysctls */
index 83d5b3c2ac421ca29e8ed654dcbf054379e3001b..e03b93360f332b3e3232873ac1cbd0ee7478fabb 100644 (file)
@@ -148,6 +148,7 @@ struct inet_cork {
        __s16                   tos;
        char                    priority;
        __u16                   gso_size;
+       u64                     transmit_time;
 };
 
 struct inet_cork_full {
@@ -358,4 +359,12 @@ static inline bool inet_get_convert_csum(struct sock *sk)
        return !!inet_sk(sk)->convert_csum;
 }
 
+
+static inline bool inet_can_nonlocal_bind(struct net *net,
+                                         struct inet_sock *inet)
+{
+       return net->ipv4.sysctl_ip_nonlocal_bind ||
+               inet->freebind || inet->transparent;
+}
+
 #endif /* _INET_SOCK_H */
index 0d2281b4b27ac0804176c063de830663762ab980..e44b1a44f67ad447528f1c59f05915157e016154 100644 (file)
@@ -72,13 +72,27 @@ struct ipcm_cookie {
        __be32                  addr;
        int                     oif;
        struct ip_options_rcu   *opt;
-       __u8                    tx_flags;
        __u8                    ttl;
        __s16                   tos;
        char                    priority;
        __u16                   gso_size;
 };
 
+static inline void ipcm_init(struct ipcm_cookie *ipcm)
+{
+       *ipcm = (struct ipcm_cookie) { .tos = -1 };
+}
+
+static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
+                               const struct inet_sock *inet)
+{
+       ipcm_init(ipcm);
+
+       ipcm->sockc.tsflags = inet->sk.sk_tsflags;
+       ipcm->oif = inet->sk.sk_bound_dev_if;
+       ipcm->addr = inet->inet_saddr;
+}
+
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
 #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
 
@@ -138,6 +152,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
                          struct ip_options_rcu *opt);
 int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
           struct net_device *orig_dev);
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+                struct net_device *orig_dev);
 int ip_local_deliver(struct sk_buff *skb);
 int ip_mr_input(struct sk_buff *skb);
 int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -148,7 +164,8 @@ void ip_send_check(struct iphdr *ip);
 int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 
-int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
+int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+                   __u8 tos);
 void ip_init(void);
 int ip_append_data(struct sock *sk, struct flowi4 *fl4,
                   int getfrag(void *from, char *to, int offset, int len,
@@ -174,6 +191,12 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4,
                            struct ipcm_cookie *ipc, struct rtable **rtp,
                            struct inet_cork *cork, unsigned int flags);
 
+static inline int ip_queue_xmit(struct sock *sk, struct sk_buff *skb,
+                               struct flowi *fl)
+{
+       return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
+}
+
 static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
 {
        return __ip_make_skb(sk, fl4, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
index 90ff430f5e9d04b1899ccadbc888f6f1376921b4..b0d022ff6ea1702037b84a038f3c81ce56540aa4 100644 (file)
@@ -466,10 +466,12 @@ static inline void ip_tunnel_info_opts_get(void *to,
 }
 
 static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
-                                          const void *from, int len)
+                                          const void *from, int len,
+                                          __be16 flags)
 {
        memcpy(ip_tunnel_info_opts(info), from, len);
        info->options_len = len;
+       info->key.tun_flags |= flags;
 }
 
 static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
@@ -511,9 +513,11 @@ static inline void ip_tunnel_info_opts_get(void *to,
 }
 
 static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
-                                          const void *from, int len)
+                                          const void *from, int len,
+                                          __be16 flags)
 {
        info->options_len = 0;
+       info->key.tun_flags |= flags;
 }
 
 #endif /* CONFIG_INET */
index a0bec23c6d5e4cd6a01bcec5104e12c27c895b62..a0d2e0bb9a94b90da24b22a327253dc6e9c0e8ff 100644 (file)
@@ -335,6 +335,11 @@ enum ip_vs_sctp_states {
        IP_VS_SCTP_S_LAST
 };
 
+/* Connection templates use bits from state */
+#define IP_VS_CTPL_S_NONE              0x0000
+#define IP_VS_CTPL_S_ASSURED           0x0001
+#define IP_VS_CTPL_S_LAST              0x0002
+
 /* Delta sequence info structure
  * Each ip_vs_conn has 2 (output AND input seq. changes).
  * Only used in the VS/NAT.
@@ -1221,7 +1226,7 @@ struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
                                  struct ip_vs_dest *dest, __u32 fwmark);
 void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 
-const char *ip_vs_state_name(__u16 proto, int state);
+const char *ip_vs_state_name(const struct ip_vs_conn *cp);
 
 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
 int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest);
@@ -1289,6 +1294,17 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
        atomic_inc(&ctl_cp->n_control);
 }
 
+/* Mark our template as assured */
+static inline void
+ip_vs_control_assure_ct(struct ip_vs_conn *cp)
+{
+       struct ip_vs_conn *ct = cp->control;
+
+       if (ct && !(ct->state & IP_VS_CTPL_S_ASSURED) &&
+           (ct->flags & IP_VS_CONN_F_TEMPLATE))
+               ct->state |= IP_VS_CTPL_S_ASSURED;
+}
+
 /* IPVS netns init & cleanup functions */
 int ip_vs_estimator_net_init(struct netns_ipvs *ipvs);
 int ip_vs_control_net_init(struct netns_ipvs *ipvs);
index 8f73be4945037c6d0997ec8ab7c3e9da3980a6e4..ff33f498c1373c86886ea579532412fa4eb0285d 100644 (file)
@@ -294,6 +294,7 @@ struct ipv6_fl_socklist {
 };
 
 struct ipcm6_cookie {
+       struct sockcm_cookie sockc;
        __s16 hlimit;
        __s16 tclass;
        __s8  dontfrag;
@@ -301,6 +302,25 @@ struct ipcm6_cookie {
        __u16 gso_size;
 };
 
+static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
+{
+       *ipc6 = (struct ipcm6_cookie) {
+               .hlimit = -1,
+               .tclass = -1,
+               .dontfrag = -1,
+       };
+}
+
+static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
+                                const struct ipv6_pinfo *np)
+{
+       *ipc6 = (struct ipcm6_cookie) {
+               .hlimit = -1,
+               .tclass = np->tclass,
+               .dontfrag = np->dontfrag,
+       };
+}
+
 static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
 {
        struct ipv6_txoptions *opt;
@@ -554,34 +574,6 @@ static inline bool ipv6_prefix_equal(const struct in6_addr *addr1,
 }
 #endif
 
-struct inet_frag_queue;
-
-enum ip6_defrag_users {
-       IP6_DEFRAG_LOCAL_DELIVER,
-       IP6_DEFRAG_CONNTRACK_IN,
-       __IP6_DEFRAG_CONNTRACK_IN       = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
-       IP6_DEFRAG_CONNTRACK_OUT,
-       __IP6_DEFRAG_CONNTRACK_OUT      = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
-       IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
-       __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
-};
-
-void ip6_frag_init(struct inet_frag_queue *q, const void *a);
-extern const struct rhashtable_params ip6_rhash_params;
-
-/*
- *     Equivalent of ipv4 struct ip
- */
-struct frag_queue {
-       struct inet_frag_queue  q;
-
-       int                     iif;
-       __u16                   nhoffset;
-       u8                      ecn;
-};
-
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
-
 static inline bool ipv6_addr_any(const struct in6_addr *a)
 {
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
@@ -790,6 +782,13 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
 
 #if IS_ENABLED(CONFIG_IPV6)
 
+static inline bool ipv6_can_nonlocal_bind(struct net *net,
+                                         struct inet_sock *inet)
+{
+       return net->ipv6.sysctl.ip_nonlocal_bind ||
+               inet->freebind || inet->transparent;
+}
+
 /* Sysctl settings for net ipv6.auto_flowlabels */
 #define IP6_AUTO_FLOW_LABEL_OFF                0
 #define IP6_AUTO_FLOW_LABEL_OPTOUT     1
@@ -915,6 +914,8 @@ static inline __be32 flowi6_get_flowlabel(const struct flowi6 *fl6)
 
 int ipv6_rcv(struct sk_buff *skb, struct net_device *dev,
             struct packet_type *pt, struct net_device *orig_dev);
+void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
+                  struct net_device *orig_dev);
 
 int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 
@@ -931,8 +932,7 @@ int ip6_append_data(struct sock *sk,
                                int odd, struct sk_buff *skb),
                    void *from, int length, int transhdrlen,
                    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
-                   struct rt6_info *rt, unsigned int flags,
-                   const struct sockcm_cookie *sockc);
+                   struct rt6_info *rt, unsigned int flags);
 
 int ip6_push_pending_frames(struct sock *sk);
 
@@ -949,8 +949,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
                             void *from, int length, int transhdrlen,
                             struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
                             struct rt6_info *rt, unsigned int flags,
-                            struct inet_cork_full *cork,
-                            const struct sockcm_cookie *sockc);
+                            struct inet_cork_full *cork);
 
 static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
 {
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
new file mode 100644 (file)
index 0000000..6ced1e6
--- /dev/null
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _IPV6_FRAG_H
+#define _IPV6_FRAG_H
+#include <linux/kernel.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+enum ip6_defrag_users {
+       IP6_DEFRAG_LOCAL_DELIVER,
+       IP6_DEFRAG_CONNTRACK_IN,
+       __IP6_DEFRAG_CONNTRACK_IN       = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
+       IP6_DEFRAG_CONNTRACK_OUT,
+       __IP6_DEFRAG_CONNTRACK_OUT      = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
+       IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
+       __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
+};
+
+/*
+ *     Equivalent of ipv4 struct ip
+ */
+struct frag_queue {
+       struct inet_frag_queue  q;
+
+       int                     iif;
+       __u16                   nhoffset;
+       u8                      ecn;
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline void ip6frag_init(struct inet_frag_queue *q, const void *a)
+{
+       struct frag_queue *fq = container_of(q, struct frag_queue, q);
+       const struct frag_v6_compare_key *key = a;
+
+       q->key.v6 = *key;
+       fq->ecn = 0;
+}
+
+static inline u32 ip6frag_key_hashfn(const void *data, u32 len, u32 seed)
+{
+       return jhash2(data,
+                     sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static inline u32 ip6frag_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+       const struct inet_frag_queue *fq = data;
+
+       return jhash2((const u32 *)&fq->key.v6,
+                     sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static inline int
+ip6frag_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+       const struct frag_v6_compare_key *key = arg->key;
+       const struct inet_frag_queue *fq = ptr;
+
+       return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static inline void
+ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
+{
+       struct net_device *dev = NULL;
+       struct sk_buff *head;
+
+       rcu_read_lock();
+       spin_lock(&fq->q.lock);
+
+       if (fq->q.flags & INET_FRAG_COMPLETE)
+               goto out;
+
+       inet_frag_kill(&fq->q);
+
+       dev = dev_get_by_index_rcu(net, fq->iif);
+       if (!dev)
+               goto out;
+
+       __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+       __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
+       /* Don't send error if the first segment did not arrive. */
+       head = fq->q.fragments;
+       if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+               goto out;
+
+       head->dev = dev;
+       skb_get(head);
+       spin_unlock(&fq->q.lock);
+
+       icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+       kfree_skb(head);
+       goto out_rcu_unlock;
+
+out:
+       spin_unlock(&fq->q.lock);
+out_rcu_unlock:
+       rcu_read_unlock();
+       inet_frag_put(&fq->q);
+}
+#endif
+#endif
diff --git a/include/net/lag.h b/include/net/lag.h
new file mode 100644 (file)
index 0000000..95b880e
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_IF_LAG_H
+#define _LINUX_IF_LAG_H
+
+#include <linux/netdevice.h>
+#include <linux/if_team.h>
+#include <net/bonding.h>
+
+static inline bool net_lag_port_dev_txable(const struct net_device *port_dev)
+{
+       if (netif_is_team_port(port_dev))
+               return team_port_dev_txable(port_dev);
+       else
+               return bond_is_active_slave_dev(port_dev);
+}
+
+#endif /* _LINUX_IF_LAG_H */
index 851a5e19ae320e02044c40cb6c810d6cd0454d4d..5790f55c241df838384145428cc7305cb83904f7 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/ieee80211.h>
 #include <net/cfg80211.h>
 #include <net/codel.h>
+#include <net/ieee80211_radiotap.h>
 #include <asm/unaligned.h>
 
 /**
@@ -162,6 +163,8 @@ enum ieee80211_ac_numbers {
  * @txop: maximum burst time in units of 32 usecs, 0 meaning disabled
  * @acm: is mandatory admission control required for the access category
  * @uapsd: is U-APSD mode enabled for the queue
+ * @mu_edca: is the MU EDCA configured
+ * @mu_edca_param_rec: MU EDCA Parameter Record for HE
  */
 struct ieee80211_tx_queue_params {
        u16 txop;
@@ -170,6 +173,8 @@ struct ieee80211_tx_queue_params {
        u8 aifs;
        bool acm;
        bool uapsd;
+       bool mu_edca;
+       struct ieee80211_he_mu_edca_param_ac_rec mu_edca_param_rec;
 };
 
 struct ieee80211_low_level_stats {
@@ -463,6 +468,15 @@ struct ieee80211_mu_group_data {
  * This structure keeps information about a BSS (and an association
  * to that BSS) that can change during the lifetime of the BSS.
  *
+ * @bss_color: 6-bit value to mark inter-BSS frame, if BSS supports HE
+ * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE
+ * @multi_sta_back_32bit: supports BA bitmap of 32-bits in Multi-STA BACK
+ * @uora_exists: is the UORA element advertised by AP
+ * @ack_enabled: indicates support to receive a multi-TID that solicits either
+ *     ACK, BACK or both
+ * @uora_ocw_range: UORA element's OCW Range field
+ * @frame_time_rts_th: HE duration RTS threshold, in units of 32us
+ * @he_support: does this BSS support HE
  * @assoc: association status
  * @ibss_joined: indicates whether this station is part of an IBSS
  *     or not
@@ -550,6 +564,14 @@ struct ieee80211_mu_group_data {
  */
 struct ieee80211_bss_conf {
        const u8 *bssid;
+       u8 bss_color;
+       u8 htc_trig_based_pkt_ext;
+       bool multi_sta_back_32bit;
+       bool uora_exists;
+       bool ack_enabled;
+       u8 uora_ocw_range;
+       u16 frame_time_rts_th;
+       bool he_support;
        /* association related data */
        bool assoc, ibss_joined;
        bool ibss_creator;
@@ -1106,6 +1128,18 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  * @RX_FLAG_AMPDU_EOF_BIT: Value of the EOF bit in the A-MPDU delimiter for this
  *     frame
  * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known
+ * @RX_FLAG_RADIOTAP_HE: HE radiotap data is present
+ *     (&struct ieee80211_radiotap_he, mac80211 will fill in
+ *      - DATA3_DATA_MCS
+ *      - DATA3_DATA_DCM
+ *      - DATA3_CODING
+ *      - DATA5_GI
+ *      - DATA5_DATA_BW_RU_ALLOC
+ *      - DATA6_NSTS
+ *      - DATA3_STBC
+ *     from the RX info data, so leave those zeroed when building this data)
+ * @RX_FLAG_RADIOTAP_HE_MU: HE MU radiotap data is present
+ *     (&struct ieee80211_radiotap_he_mu)
  */
 enum mac80211_rx_flags {
        RX_FLAG_MMIC_ERROR              = BIT(0),
@@ -1134,6 +1168,8 @@ enum mac80211_rx_flags {
        RX_FLAG_ICV_STRIPPED            = BIT(23),
        RX_FLAG_AMPDU_EOF_BIT           = BIT(24),
        RX_FLAG_AMPDU_EOF_BIT_KNOWN     = BIT(25),
+       RX_FLAG_RADIOTAP_HE             = BIT(26),
+       RX_FLAG_RADIOTAP_HE_MU          = BIT(27),
 };
 
 /**
@@ -1164,6 +1200,7 @@ enum mac80211_rx_encoding {
        RX_ENC_LEGACY = 0,
        RX_ENC_HT,
        RX_ENC_VHT,
+       RX_ENC_HE,
 };
 
 /**
@@ -1198,6 +1235,9 @@ enum mac80211_rx_encoding {
  * @encoding: &enum mac80211_rx_encoding
  * @bw: &enum rate_info_bw
  * @enc_flags: uses bits from &enum mac80211_rx_encoding_flags
+ * @he_ru: HE RU, from &enum nl80211_he_ru_alloc
+ * @he_gi: HE GI, from &enum nl80211_he_gi
+ * @he_dcm: HE DCM value
  * @rx_flags: internal RX flags for mac80211
  * @ampdu_reference: A-MPDU reference number, must be a different value for
  *     each A-MPDU but the same for each subframe within one A-MPDU
@@ -1211,7 +1251,8 @@ struct ieee80211_rx_status {
        u32 flag;
        u16 freq;
        u8 enc_flags;
-       u8 encoding:2, bw:3;
+       u8 encoding:2, bw:3, he_ru:3;
+       u8 he_gi:2, he_dcm:1;
        u8 rate_idx;
        u8 nss;
        u8 rx_flags;
@@ -1770,6 +1811,7 @@ struct ieee80211_sta_rates {
  * @supp_rates: Bitmap of supported rates (per band)
  * @ht_cap: HT capabilities of this STA; restricted to our own capabilities
  * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities
+ * @he_cap: HE capabilities of this STA
  * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU
  *     that this station is allowed to transmit to us.
  *     Can be modified by driver.
@@ -1805,7 +1847,8 @@ struct ieee80211_sta {
        u16 aid;
        struct ieee80211_sta_ht_cap ht_cap;
        struct ieee80211_sta_vht_cap vht_cap;
-       u8 max_rx_aggregation_subframes;
+       struct ieee80211_sta_he_cap he_cap;
+       u16 max_rx_aggregation_subframes;
        bool wme;
        u8 uapsd_queues;
        u8 max_sp;
@@ -2196,10 +2239,11 @@ enum ieee80211_hw_flags {
  *     it shouldn't be set.
  *
  * @max_tx_aggregation_subframes: maximum number of subframes in an
- *     aggregate an HT driver will transmit. Though ADDBA will advertise
- *     a constant value of 64 as some older APs can crash if the window
- *     size is smaller (an example is LinkSys WRT120N with FW v1.0.07
- *     build 002 Jun 18 2012).
+ *     aggregate an HT/HE device will transmit. In HT AddBA we'll
+ *     advertise a constant value of 64 as some older APs crash if
+ *     the window size is smaller (an example is LinkSys WRT120N
+ *     with FW v1.0.07 build 002 Jun 18 2012).
+ *     For AddBA to HE capable peers this value will be used.
  *
  * @max_tx_fragments: maximum number of tx buffers per (A)-MSDU, sum
  *     of 1 + skb_shinfo(skb)->nr_frags for each skb in the frag_list.
@@ -2216,6 +2260,8 @@ enum ieee80211_hw_flags {
  *     the default is _GI | _BANDWIDTH.
  *     Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values.
  *
+ * @radiotap_he: HE radiotap validity flags
+ *
  * @radiotap_timestamp: Information for the radiotap timestamp field; if the
  *     'units_pos' member is set to a non-negative value it must be set to
  *     a combination of a IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a
@@ -2263,8 +2309,8 @@ struct ieee80211_hw {
        u8 max_rates;
        u8 max_report_rates;
        u8 max_rate_tries;
-       u8 max_rx_aggregation_subframes;
-       u8 max_tx_aggregation_subframes;
+       u16 max_rx_aggregation_subframes;
+       u16 max_tx_aggregation_subframes;
        u8 max_tx_fragments;
        u8 offchannel_tx_hw_queue;
        u8 radiotap_mcs_details;
@@ -2904,7 +2950,7 @@ struct ieee80211_ampdu_params {
        struct ieee80211_sta *sta;
        u16 tid;
        u16 ssn;
-       u8 buf_size;
+       u16 buf_size;
        bool amsdu;
        u16 timeout;
 };
index a71264d75d7f98d28f92dfd861ffe6e0d39c0198..9b5fdc50519acfbc6ab9219579e742fe79193194 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/workqueue.h>
 #include <linux/list.h>
 #include <linux/sysctl.h>
+#include <linux/uidgid.h>
 
 #include <net/flow.h>
 #include <net/netns/core.h>
@@ -170,6 +171,8 @@ extern struct net init_net;
 struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
                        struct net *old_net);
 
+void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
+
 void net_ns_barrier(void);
 #else /* CONFIG_NET_NS */
 #include <linux/sched.h>
@@ -182,6 +185,13 @@ static inline struct net *copy_net_ns(unsigned long flags,
        return old_net;
 }
 
+static inline void net_ns_get_ownership(const struct net *net,
+                                       kuid_t *uid, kgid_t *gid)
+{
+       *uid = GLOBAL_ROOT_UID;
+       *gid = GLOBAL_ROOT_GID;
+}
+
 static inline void net_ns_barrier(void) {}
 #endif /* CONFIG_NET_NS */
 
index d9918261701c95ca1a2f598268549afdc286c6d6..4107016c3bb46d3eb6b31dcf16b3a2b76b75b2e9 100644 (file)
@@ -28,6 +28,7 @@ enum netevent_notif_type {
        NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
        NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
        NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+       NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
index 73f8257323262e6dc8a4ce5f690355b42ef71c17..c84b51682f08c68b7dbddcdb9f8b82b2465cefa6 100644 (file)
@@ -10,9 +10,6 @@
 #ifndef _NF_CONNTRACK_IPV4_H
 #define _NF_CONNTRACK_IPV4_H
 
-
-const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
-
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
index 062dc19b5840654c49b45a53f887f0990ee31d9a..a2b0ed025908f2bce9a8632285b64275f8f56635 100644 (file)
@@ -41,6 +41,11 @@ union nf_conntrack_expect_proto {
        /* insert expect proto private data here */
 };
 
+struct nf_conntrack_net {
+       unsigned int users4;
+       unsigned int users6;
+};
+
 #include <linux/types.h>
 #include <linux/skbuff.h>
 
index 9b5e7634713e4125b983dd5964c6f196c17f5fdf..2a3e0974a6af4029ecba39cf4bb41d2c46d77282 100644 (file)
@@ -14,7 +14,6 @@
 #define _NF_CONNTRACK_CORE_H
 
 #include <linux/netfilter.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
@@ -40,16 +39,8 @@ void nf_conntrack_cleanup_start(void);
 void nf_conntrack_init_end(void);
 void nf_conntrack_cleanup_end(void);
 
-bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff,
-                    unsigned int dataoff, u_int16_t l3num, u_int8_t protonum,
-                    struct net *net,
-                    struct nf_conntrack_tuple *tuple,
-                    const struct nf_conntrack_l3proto *l3proto,
-                    const struct nf_conntrack_l4proto *l4proto);
-
 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
                        const struct nf_conntrack_tuple *orig,
-                       const struct nf_conntrack_l3proto *l3proto,
                        const struct nf_conntrack_l4proto *l4proto);
 
 /* Find a connection corresponding to a tuple. */
@@ -75,10 +66,8 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
        return ret;
 }
 
-void
-print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
-            const struct nf_conntrack_l3proto *l3proto,
-            const struct nf_conntrack_l4proto *proto);
+void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
+                const struct nf_conntrack_l4proto *proto);
 
 #define CONNTRACK_LOCKS 1024
 
index 3a188a0923a38189f010ebf7d99766d1bd6cea54..4b2b2baf8ab4bbddd7f7cc104b42b15434aee18b 100644 (file)
@@ -1,8 +1,23 @@
 #ifndef _NF_CONNTRACK_COUNT_H
 #define _NF_CONNTRACK_COUNT_H
 
+#include <linux/list.h>
+
 struct nf_conncount_data;
 
+enum nf_conncount_list_add {
+       NF_CONNCOUNT_ADDED,     /* list add was ok */
+       NF_CONNCOUNT_ERR,       /* -ENOMEM, must drop skb */
+       NF_CONNCOUNT_SKIP,      /* list is already reclaimed by gc */
+};
+
+struct nf_conncount_list {
+       spinlock_t list_lock;
+       struct list_head head;  /* connections with the same filtering key */
+       unsigned int count;     /* length of list */
+       bool dead;
+};
+
 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
                                            unsigned int keylen);
 void nf_conncount_destroy(struct net *net, unsigned int family,
@@ -14,15 +29,21 @@ unsigned int nf_conncount_count(struct net *net,
                                const struct nf_conntrack_tuple *tuple,
                                const struct nf_conntrack_zone *zone);
 
-unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
-                                const struct nf_conntrack_tuple *tuple,
-                                const struct nf_conntrack_zone *zone,
-                                bool *addit);
+void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
+                        const struct nf_conntrack_tuple *tuple,
+                        const struct nf_conntrack_zone *zone,
+                        bool *addit);
+
+void nf_conncount_list_init(struct nf_conncount_list *list);
+
+enum nf_conncount_list_add
+nf_conncount_add(struct nf_conncount_list *list,
+                const struct nf_conntrack_tuple *tuple,
+                const struct nf_conntrack_zone *zone);
 
-bool nf_conncount_add(struct hlist_head *head,
-                     const struct nf_conntrack_tuple *tuple,
-                     const struct nf_conntrack_zone *zone);
+bool nf_conncount_gc_list(struct net *net,
+                         struct nf_conncount_list *list);
 
-void nf_conncount_cache_free(struct hlist_head *hhead);
+void nf_conncount_cache_free(struct nf_conncount_list *list);
 
 #endif
index 32c2a94a219d603dda33a2247f00a36b56fb786a..2492120b809720d8a685ef80c7d9c496ba5558a7 100644 (file)
@@ -103,9 +103,7 @@ int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int);
 void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *,
                                     unsigned int);
 
-struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct,
-                                         struct nf_conntrack_helper *helper,
-                                         gfp_t gfp);
+struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
 
 int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
                              gfp_t flags);
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
deleted file mode 100644 (file)
index d5808f3..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C)2003,2004 USAGI/WIDE Project
- *
- * Header for use in defining a given L3 protocol for connection tracking.
- *
- * Author:
- *     Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * Derived from include/netfilter_ipv4/ip_conntrack_protocol.h
- */
-
-#ifndef _NF_CONNTRACK_L3PROTO_H
-#define _NF_CONNTRACK_L3PROTO_H
-#include <linux/netlink.h>
-#include <net/netlink.h>
-#include <linux/seq_file.h>
-#include <net/netfilter/nf_conntrack.h>
-
-struct nf_conntrack_l3proto {
-       /* L3 Protocol Family number. ex) PF_INET */
-       u_int16_t l3proto;
-
-       /* size of tuple nlattr, fills a hole */
-       u16 nla_size;
-
-       /*
-        * Try to fill in the third arg: nhoff is offset of l3 proto
-         * hdr.  Return true if possible.
-        */
-       bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff,
-                            struct nf_conntrack_tuple *tuple);
-
-       /*
-        * Invert the per-proto part of the tuple: ie. turn xmit into reply.
-        * Some packets can't be inverted: return 0 in that case.
-        */
-       bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
-                            const struct nf_conntrack_tuple *orig);
-
-       /*
-        * Called before tracking. 
-        *      *dataoff: offset of protocol header (TCP, UDP,...) in skb
-        *      *protonum: protocol number
-        */
-       int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
-                          unsigned int *dataoff, u_int8_t *protonum);
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       int (*tuple_to_nlattr)(struct sk_buff *skb,
-                              const struct nf_conntrack_tuple *t);
-       int (*nlattr_to_tuple)(struct nlattr *tb[],
-                              struct nf_conntrack_tuple *t);
-       const struct nla_policy *nla_policy;
-#endif
-
-       /* Called when netns wants to use connection tracking */
-       int (*net_ns_get)(struct net *);
-       void (*net_ns_put)(struct net *);
-
-       /* Module (if any) which this is connected to. */
-       struct module *me;
-};
-
-extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
-
-/* Protocol global registration. */
-int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto);
-void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto);
-
-const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
-
-/* Existing built-in protocols */
-extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
-
-static inline struct nf_conntrack_l3proto *
-__nf_ct_l3proto_find(u_int16_t l3proto)
-{
-       if (unlikely(l3proto >= NFPROTO_NUMPROTO))
-               return &nf_conntrack_l3proto_generic;
-       return rcu_dereference(nf_ct_l3protos[l3proto]);
-}
-
-#endif /*_NF_CONNTRACK_L3PROTO_H*/
index a7220eef9aeed424357e5439622368f5dd4dc845..6068c6da3eaca03703f3fe08f36b8649ae4e0cc4 100644 (file)
@@ -36,7 +36,7 @@ struct nf_conntrack_l4proto {
                             struct net *net, struct nf_conntrack_tuple *tuple);
 
        /* Invert the per-proto part of the tuple: ie. turn xmit into reply.
-        * Some packets can't be inverted: return 0 in that case.
+        * Only used by icmp, most protocols use a generic version.
         */
        bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
                             const struct nf_conntrack_tuple *orig);
@@ -45,13 +45,12 @@ struct nf_conntrack_l4proto {
        int (*packet)(struct nf_conn *ct,
                      const struct sk_buff *skb,
                      unsigned int dataoff,
-                     enum ip_conntrack_info ctinfo,
-                     unsigned int *timeouts);
+                     enum ip_conntrack_info ctinfo);
 
        /* Called when a new connection for this protocol found;
         * returns TRUE if it's OK.  If so, packet() called next. */
        bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
-                   unsigned int dataoff, unsigned int *timeouts);
+                   unsigned int dataoff);
 
        /* Called when a conntrack entry is destroyed */
        void (*destroy)(struct nf_conn *ct);
@@ -63,9 +62,6 @@ struct nf_conntrack_l4proto {
        /* called by gc worker if table is full */
        bool (*can_early_drop)(const struct nf_conn *ct);
 
-       /* Return the array of timeouts for this protocol. */
-       unsigned int *(*get_timeouts)(struct net *net);
-
        /* convert protoinfo to nfnetink attributes */
        int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
                         struct nf_conn *ct);
@@ -134,10 +130,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
 /* Protocol global registration. */
 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
-int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[],
-                          unsigned int num_proto);
-void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[],
-                             unsigned int num_proto);
 
 /* Generic netlink helpers */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
index 9468ab4ad12dd78c907206a5a69bf245320e8b04..80ceb3d0291dfb0769a0814474d55cceda522ee7 100644 (file)
@@ -67,27 +67,17 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
 #endif
 };
 
-static inline unsigned int *
-nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
-                    const struct nf_conntrack_l4proto *l4proto)
+static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
 {
+       unsigned int *timeouts = NULL;
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
        struct nf_conn_timeout *timeout_ext;
-       unsigned int *timeouts;
 
        timeout_ext = nf_ct_timeout_find(ct);
-       if (timeout_ext) {
+       if (timeout_ext)
                timeouts = nf_ct_timeout_data(timeout_ext);
-               if (unlikely(!timeouts))
-                       timeouts = l4proto->get_timeouts(net);
-       } else {
-               timeouts = l4proto->get_timeouts(net);
-       }
-
-       return timeouts;
-#else
-       return l4proto->get_timeouts(net);
 #endif
+       return timeouts;
 }
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
index ba9fa4592f2b238fb642cbefce130d7a100bb0ef..0e355f4a3d76365e2cfab25b0febfd586b245493 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/rcupdate.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/dst.h>
index e811ac07ea94a2c32dd1d36fe59028ec94405571..0d3920896d5023647b9182e8c477b4073950e806 100644 (file)
@@ -106,7 +106,8 @@ int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
                           u8 proto, int fragment, unsigned int offset,
                           unsigned int logflags);
-void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk);
+void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+                           struct sock *sk);
 void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
                               unsigned int hooknum, const struct sk_buff *skb,
                               const struct net_device *in,
index 4cc64c8446eb94f1c122cf15d4bf74c7e3f2275d..82d0e41b76f224f02a5b8fb3fec6d4c64721066a 100644 (file)
@@ -17,6 +17,14 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
        return false;
 }
 
+/* assign a socket to the skb -- consumes sk */
+static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+       skb_orphan(skb);
+       skb->sk = sk;
+       skb->destructor = sock_edemux;
+}
+
 __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
 
 /**
index 24c78183a4c262e086c29cde1e61b7f397d8bab0..16a842456189f2fc1a3363685b5dd4310a32b2b8 100644 (file)
@@ -9,12 +9,7 @@ struct net;
 static inline u32 net_hash_mix(const struct net *net)
 {
 #ifdef CONFIG_NET_NS
-       /*
-        * shift this right to eliminate bits, that are
-        * always zeroed
-        */
-
-       return (u32)(((unsigned long)net) >> L1_CACHE_SHIFT);
+       return (u32)(((unsigned long)net) >> ilog2(sizeof(*net)));
 #else
        return 0;
 #endif
index 661348f23ea5a3a9320b2cafcd17e23960214771..e47503b4e4d178e1ef334f4eb11378a9432bfbf8 100644 (file)
@@ -98,6 +98,7 @@ struct netns_ipv4 {
        int sysctl_ip_default_ttl;
        int sysctl_ip_no_pmtu_disc;
        int sysctl_ip_fwd_use_pmtu;
+       int sysctl_ip_fwd_update_priority;
        int sysctl_ip_nonlocal_bind;
        /* Shall we try to damage output packets if routing dev changes? */
        int sysctl_ip_dynaddr;
index 94767ea3a490660cfe23da891c9a3ba8255da4e1..286fd960896fdd8fa79b9598cdc8081fc424867d 100644 (file)
@@ -7,6 +7,7 @@
 struct netns_nftables {
        struct list_head        tables;
        struct list_head        commit_list;
+       struct mutex            commit_mutex;
        unsigned int            base_seq;
        u8                      gencursor;
        u8                      validate_state;
index 20b059574e600e64838b0bdecfaf6a76e6629d4a..ef727f71336e7e5f89d92000194bcfab4bacc5a8 100644 (file)
@@ -7,12 +7,16 @@
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 
+/* TC action not accessible from user space */
+#define TC_ACT_REINSERT                (TC_ACT_VALUE_MAX + 1)
+
 /* Basic packet classifier frontend definitions. */
 
 struct tcf_walker {
        int     stop;
        int     skip;
        int     count;
+       unsigned long cookie;
        int     (*fn)(struct tcf_proto *, void *node, struct tcf_walker *);
 };
 
@@ -36,9 +40,9 @@ struct tcf_block_cb;
 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 
 #ifdef CONFIG_NET_CLS
-struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
-                               bool create);
-void tcf_chain_put(struct tcf_chain *chain);
+struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
+                                      u32 chain_index);
+void tcf_chain_put_by_act(struct tcf_chain *chain);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
                  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
@@ -73,11 +77,13 @@ void tcf_block_cb_incref(struct tcf_block_cb *block_cb);
 unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb);
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
                                             tc_setup_cb_t *cb, void *cb_ident,
-                                            void *cb_priv);
+                                            void *cb_priv,
+                                            struct netlink_ext_ack *extack);
 int tcf_block_cb_register(struct tcf_block *block,
                          tc_setup_cb_t *cb, void *cb_ident,
-                         void *cb_priv);
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb);
+                         void *cb_priv, struct netlink_ext_ack *extack);
+void __tcf_block_cb_unregister(struct tcf_block *block,
+                              struct tcf_block_cb *block_cb);
 void tcf_block_cb_unregister(struct tcf_block *block,
                             tc_setup_cb_t *cb, void *cb_ident);
 
@@ -111,11 +117,6 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 {
 }
 
-static inline bool tcf_block_shared(struct tcf_block *block)
-{
-       return false;
-}
-
 static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 {
        return NULL;
@@ -166,7 +167,8 @@ unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
 static inline
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
                                             tc_setup_cb_t *cb, void *cb_ident,
-                                            void *cb_priv)
+                                            void *cb_priv,
+                                            struct netlink_ext_ack *extack)
 {
        return NULL;
 }
@@ -174,13 +176,14 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 static inline
 int tcf_block_cb_register(struct tcf_block *block,
                          tc_setup_cb_t *cb, void *cb_ident,
-                         void *cb_priv)
+                         void *cb_priv, struct netlink_ext_ack *extack)
 {
        return 0;
 }
 
 static inline
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+void __tcf_block_cb_unregister(struct tcf_block *block,
+                              struct tcf_block_cb *block_cb)
 {
 }
 
@@ -601,6 +604,7 @@ struct tc_block_offload {
        enum tc_block_command command;
        enum tcf_block_binder_type binder_type;
        struct tcf_block *block;
+       struct netlink_ext_ack *extack;
 };
 
 struct tc_cls_common_offload {
@@ -720,6 +724,8 @@ enum tc_fl_command {
        TC_CLSFLOWER_REPLACE,
        TC_CLSFLOWER_DESTROY,
        TC_CLSFLOWER_STATS,
+       TC_CLSFLOWER_TMPLT_CREATE,
+       TC_CLSFLOWER_TMPLT_DESTROY,
 };
 
 struct tc_cls_flower_offload {
@@ -776,6 +782,7 @@ struct tc_mqprio_qopt_offload {
 struct tc_cookie {
        u8  *data;
        u32 len;
+       struct rcu_head rcu;
 };
 
 struct tc_qopt_offload_stats {
index 815b92a23936f8f8ce49432b1e5c4c7827580918..7dc769e5452ba6e5eaa92f10808bf2cbfb1d152a 100644 (file)
@@ -72,6 +72,8 @@ struct qdisc_watchdog {
        struct Qdisc    *qdisc;
 };
 
+void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
+                                clockid_t clockid);
 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires);
 
@@ -153,4 +155,9 @@ struct tc_cbs_qopt_offload {
        s32 sendslope;
 };
 
+struct tc_etf_qopt_offload {
+       u8 enable;
+       s32 queue;
+};
+
 #endif
index 6488daa32f829563c3d26ffe075022f3c9ededf5..a6d00093f35e70d9b5da920be22ae4fd1db0bb6a 100644 (file)
@@ -20,6 +20,9 @@ struct qdisc_walker;
 struct tcf_walker;
 struct module;
 
+typedef int tc_setup_cb_t(enum tc_setup_type type,
+                         void *type_data, void *cb_priv);
+
 struct qdisc_rate_table {
        struct tc_ratespec rate;
        u32             data[256];
@@ -232,9 +235,17 @@ struct tcf_result {
                        u32             classid;
                };
                const struct tcf_proto *goto_tp;
+
+               /* used by the TC_ACT_REINSERT action */
+               struct {
+                       bool            ingress;
+                       struct gnet_stats_queue *qstats;
+               };
        };
 };
 
+struct tcf_chain;
+
 struct tcf_proto_ops {
        struct list_head        head;
        char                    kind[IFNAMSIZ];
@@ -256,11 +267,22 @@ struct tcf_proto_ops {
                                          bool *last,
                                          struct netlink_ext_ack *);
        void                    (*walk)(struct tcf_proto*, struct tcf_walker *arg);
+       int                     (*reoffload)(struct tcf_proto *tp, bool add,
+                                            tc_setup_cb_t *cb, void *cb_priv,
+                                            struct netlink_ext_ack *extack);
        void                    (*bind_class)(void *, u32, unsigned long);
+       void *                  (*tmplt_create)(struct net *net,
+                                               struct tcf_chain *chain,
+                                               struct nlattr **tca,
+                                               struct netlink_ext_ack *extack);
+       void                    (*tmplt_destroy)(void *tmplt_priv);
 
        /* rtnetlink specific */
        int                     (*dump)(struct net*, struct tcf_proto*, void *,
                                        struct sk_buff *skb, struct tcmsg*);
+       int                     (*tmplt_dump)(struct sk_buff *skb,
+                                             struct net *net,
+                                             void *tmplt_priv);
 
        struct module           *owner;
 };
@@ -269,6 +291,8 @@ struct tcf_proto {
        /* Fast access part */
        struct tcf_proto __rcu  *next;
        void __rcu              *root;
+
+       /* called under RCU BH lock*/
        int                     (*classify)(struct sk_buff *,
                                            const struct tcf_proto *,
                                            struct tcf_result *);
@@ -294,11 +318,14 @@ typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
 
 struct tcf_chain {
        struct tcf_proto __rcu *filter_chain;
-       struct list_head filter_chain_list;
        struct list_head list;
        struct tcf_block *block;
        u32 index; /* chain index */
        unsigned int refcnt;
+       unsigned int action_refcnt;
+       bool explicitly_created;
+       const struct tcf_proto_ops *tmplt_ops;
+       void *tmplt_priv;
 };
 
 struct tcf_block {
@@ -312,6 +339,10 @@ struct tcf_block {
        bool keep_dst;
        unsigned int offloadcnt; /* Number of oddloaded filters */
        unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
+       struct {
+               struct tcf_chain *chain;
+               struct list_head filter_chain_list;
+       } chain0;
 };
 
 static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
@@ -330,6 +361,21 @@ static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
        block->offloadcnt--;
 }
 
+static inline void
+tc_cls_offload_cnt_update(struct tcf_block *block, unsigned int *cnt,
+                         u32 *flags, bool add)
+{
+       if (add) {
+               if (!*cnt)
+                       tcf_block_offload_inc(block, flags);
+               (*cnt)++;
+       } else {
+               (*cnt)--;
+               if (!*cnt)
+                       tcf_block_offload_dec(block, flags);
+       }
+}
+
 static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
 {
        struct qdisc_skb_cb *qcb;
@@ -529,6 +575,15 @@ static inline void skb_reset_tc(struct sk_buff *skb)
 #endif
 }
 
+static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       return skb->tc_redirected;
+#else
+       return false;
+#endif
+}
+
 static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1068,4 +1123,17 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
                          struct mini_Qdisc __rcu **p_miniq);
 
+static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
+{
+       struct gnet_stats_queue *stats = res->qstats;
+       int ret;
+
+       if (res->ingress)
+               ret = netif_receive_skb(skb);
+       else
+               ret = dev_queue_xmit(skb);
+       if (ret && stats)
+               qstats_overlimit_inc(res->qstats);
+}
+
 #endif
index dbe1b911a24d31e920f4c31d3c945857b760424b..ab869e0d83267b730d7b0758dbf90f233962c714 100644 (file)
@@ -48,7 +48,7 @@
 #define __sctp_structs_h__
 
 #include <linux/ktime.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/socket.h>      /* linux/in.h needs this!!    */
 #include <linux/in.h>          /* We get struct sockaddr_in. */
 #include <linux/in6.h>         /* We get struct in6_addr     */
@@ -193,6 +193,9 @@ struct sctp_sock {
        /* This is the max_retrans value for new associations. */
        __u16 pathmaxrxt;
 
+       __u32 flowlabel;
+       __u8  dscp;
+
        /* The initial Path MTU to use for new associations. */
        __u32 pathmtu;
 
@@ -220,6 +223,7 @@ struct sctp_sock {
        __u32 adaptation_ind;
        __u32 pd_point;
        __u16   nodelay:1,
+               reuse:1,
                disable_fragments:1,
                v4mapped:1,
                frag_interleave:1,
@@ -894,6 +898,9 @@ struct sctp_transport {
         */
        __u16 pathmaxrxt;
 
+       __u32 flowlabel;
+       __u8  dscp;
+
        /* This is the partially failed retrans value for the transport
         * and will be initialized from the assocs value.  This can be changed
         * using the SCTP_PEER_ADDR_THLDS socket option
@@ -1771,6 +1778,9 @@ struct sctp_association {
         */
        __u16 pathmaxrxt;
 
+       __u32 flowlabel;
+       __u8  dscp;
+
        /* Flag that path mtu update is pending */
        __u8   pmtu_pending;
 
index e029e301faa51f45d3fc3c6b87daf79243d6ad44..2567941a2f32ff4896380296463462e43c5d760f 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/ipv6.h>
 #include <net/lwtunnel.h>
 #include <linux/seg6.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 static inline void update_csum_diff4(struct sk_buff *skb, __be32 from,
                                     __be32 to)
index 69c3a106056b4dbe9db91a5295431190aa31c4d5..7fda469e27583a5bf9adfb48230cff385fc9d065 100644 (file)
@@ -22,7 +22,7 @@
 #include <linux/route.h>
 #include <net/seg6.h>
 #include <linux/seg6_hmac.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 #define SEG6_HMAC_MAX_DIGESTSIZE       160
 #define SEG6_HMAC_RING_SIZE            256
index 8381d163fefad5b2dd299d5bbc1fd67900bd40e4..9ef49f8b1002a42044e3c70fb86666aacb46a074 100644 (file)
@@ -11,6 +11,8 @@
 #ifndef _SMC_H
 #define _SMC_H
 
+#define SMC_MAX_PNETID_LEN     16      /* Max. length of PNET id */
+
 struct smc_hashinfo {
        rwlock_t lock;
        struct hlist_head ht;
@@ -18,4 +20,67 @@ struct smc_hashinfo {
 
 int smc_hash_sk(struct sock *sk);
 void smc_unhash_sk(struct sock *sk);
+
+/* SMCD/ISM device driver interface */
+struct smcd_dmb {
+       u64 dmb_tok;
+       u64 rgid;
+       u32 dmb_len;
+       u32 sba_idx;
+       u32 vlan_valid;
+       u32 vlan_id;
+       void *cpu_addr;
+       dma_addr_t dma_addr;
+};
+
+#define ISM_EVENT_DMB  0
+#define ISM_EVENT_GID  1
+#define ISM_EVENT_SWR  2
+
+struct smcd_event {
+       u32 type;
+       u32 code;
+       u64 tok;
+       u64 time;
+       u64 info;
+};
+
+struct smcd_dev;
+
+struct smcd_ops {
+       int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid,
+                               u32 vid);
+       int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
+       int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
+       int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
+       int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
+       int (*set_vlan_required)(struct smcd_dev *dev);
+       int (*reset_vlan_required)(struct smcd_dev *dev);
+       int (*signal_event)(struct smcd_dev *dev, u64 rgid, u32 trigger_irq,
+                           u32 event_code, u64 info);
+       int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
+                        bool sf, unsigned int offset, void *data,
+                        unsigned int size);
+};
+
+struct smcd_dev {
+       const struct smcd_ops *ops;
+       struct device dev;
+       void *priv;
+       u64 local_gid;
+       struct list_head list;
+       spinlock_t lock;
+       struct smc_connection **conn;
+       struct list_head vlan;
+       struct workqueue_struct *event_wq;
+       u8 pnetid[SMC_MAX_PNETID_LEN];
+};
+
+struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
+                               const struct smcd_ops *ops, int max_dmbs);
+int smcd_register_dev(struct smcd_dev *smcd);
+void smcd_unregister_dev(struct smcd_dev *smcd);
+void smcd_free_dev(struct smcd_dev *smcd);
+void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event);
+void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit);
 #endif /* _SMC_H */
index b3b75419eafecb5cf170830059f0dc667141646d..433f45fc2d6836a2965d09a123475fcbeb7c0bb2 100644 (file)
@@ -139,6 +139,7 @@ typedef __u64 __bitwise __addrpair;
  *     @skc_node: main hash linkage for various protocol lookup tables
  *     @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
  *     @skc_tx_queue_mapping: tx queue number for this connection
+ *     @skc_rx_queue_mapping: rx queue number for this connection
  *     @skc_flags: place holder for sk_flags
  *             %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
  *             %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -214,7 +215,10 @@ struct sock_common {
                struct hlist_node       skc_node;
                struct hlist_nulls_node skc_nulls_node;
        };
-       int                     skc_tx_queue_mapping;
+       unsigned short          skc_tx_queue_mapping;
+#ifdef CONFIG_XPS
+       unsigned short          skc_rx_queue_mapping;
+#endif
        union {
                int             skc_incoming_cpu;
                u32             skc_rcv_wnd;
@@ -315,6 +319,9 @@ struct sock_common {
   *    @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
   *    @sk_reuseport_cb: reuseport group container
   *    @sk_rcu: used during RCU grace period
+  *    @sk_clockid: clockid used by time-based scheduling (SO_TXTIME)
+  *    @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
+  *    @sk_txtime_unused: unused txtime flags
   */
 struct sock {
        /*
@@ -326,6 +333,9 @@ struct sock {
 #define sk_nulls_node          __sk_common.skc_nulls_node
 #define sk_refcnt              __sk_common.skc_refcnt
 #define sk_tx_queue_mapping    __sk_common.skc_tx_queue_mapping
+#ifdef CONFIG_XPS
+#define sk_rx_queue_mapping    __sk_common.skc_rx_queue_mapping
+#endif
 
 #define sk_dontcopy_begin      __sk_common.skc_dontcopy_begin
 #define sk_dontcopy_end                __sk_common.skc_dontcopy_end
@@ -468,6 +478,12 @@ struct sock {
        u8                      sk_shutdown;
        u32                     sk_tskey;
        atomic_t                sk_zckey;
+
+       u8                      sk_clockid;
+       u8                      sk_txtime_deadline_mode : 1,
+                               sk_txtime_report_errors : 1,
+                               sk_txtime_unused : 6;
+
        struct socket           *sk_socket;
        void                    *sk_user_data;
 #ifdef CONFIG_SECURITY
@@ -783,6 +799,7 @@ enum sock_flags {
        SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
        SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
        SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
+       SOCK_TXTIME,
 };
 
 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -1578,10 +1595,17 @@ void sock_kzfree_s(struct sock *sk, void *mem, int size);
 void sk_send_sigurg(struct sock *sk);
 
 struct sockcm_cookie {
+       u64 transmit_time;
        u32 mark;
        u16 tsflags;
 };
 
+static inline void sockcm_init(struct sockcm_cookie *sockc,
+                              const struct sock *sk)
+{
+       *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
+}
+
 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
                     struct sockcm_cookie *sockc);
 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
@@ -1681,19 +1705,58 @@ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
 
 static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
 {
+       /* sk_tx_queue_mapping accept only upto a 16-bit value */
+       if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX))
+               return;
        sk->sk_tx_queue_mapping = tx_queue;
 }
 
+#define NO_QUEUE_MAPPING       USHRT_MAX
+
 static inline void sk_tx_queue_clear(struct sock *sk)
 {
-       sk->sk_tx_queue_mapping = -1;
+       sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING;
 }
 
 static inline int sk_tx_queue_get(const struct sock *sk)
 {
-       return sk ? sk->sk_tx_queue_mapping : -1;
+       if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING)
+               return sk->sk_tx_queue_mapping;
+
+       return -1;
 }
 
+static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+       if (skb_rx_queue_recorded(skb)) {
+               u16 rx_queue = skb_get_rx_queue(skb);
+
+               if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING))
+                       return;
+
+               sk->sk_rx_queue_mapping = rx_queue;
+       }
+#endif
+}
+
+static inline void sk_rx_queue_clear(struct sock *sk)
+{
+#ifdef CONFIG_XPS
+       sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING;
+#endif
+}
+
+#ifdef CONFIG_XPS
+static inline int sk_rx_queue_get(const struct sock *sk)
+{
+       if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING)
+               return sk->sk_rx_queue_mapping;
+
+       return -1;
+}
+#endif
+
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
        sk_tx_queue_clear(sk);
@@ -1725,7 +1788,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 {
        WARN_ON(parent->sk);
        write_lock_bh(&sk->sk_callback_lock);
-       sk->sk_wq = parent->wq;
+       rcu_assign_pointer(sk->sk_wq, parent->wq);
        parent->sk = sk;
        sk_set_socket(sk, parent);
        sk->sk_uid = SOCK_INODE(parent)->i_uid;
@@ -1994,16 +2057,16 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
 /**
  * sock_poll_wait - place memory barrier behind the poll_wait call.
  * @filp:           file
- * @wait_address:   socket wait queue
  * @p:              poll_table
  *
  * See the comments in the wq_has_sleeper function.
  */
-static inline void sock_poll_wait(struct file *filp,
-               wait_queue_head_t *wait_address, poll_table *p)
+static inline void sock_poll_wait(struct file *filp, poll_table *p)
 {
-       if (!poll_does_not_wait(p) && wait_address) {
-               poll_wait(filp, wait_address, p);
+       struct socket *sock = filp->private_data;
+
+       if (!poll_does_not_wait(p)) {
+               poll_wait(filp, &sock->wq->wait, p);
                /* We need to be sure we are in sync with the
                 * socket flags modification.
                 *
index 227a6f1d02f4c4a2c401648071aecd733aa1a5a7..fac3ad4a86de96ed1acb1b2747f043471b5f4fd3 100644 (file)
@@ -17,6 +17,7 @@ struct tcf_pedit {
        struct tc_pedit_key     *tcfp_keys;
        struct tcf_pedit_key_ex *tcfp_keys_ex;
 };
+
 #define to_pedit(a) ((struct tcf_pedit *)a)
 
 static inline bool is_tcf_pedit(const struct tc_action *a)
index 19cd3d3458049a2fe6d317bc48ff9e5c0524587b..911bbac838a271b5d3690fc890338f19201c521b 100644 (file)
 #include <net/act_api.h>
 #include <linux/tc_act/tc_skbedit.h>
 
+struct tcf_skbedit_params {
+       u32 flags;
+       u32 priority;
+       u32 mark;
+       u32 mask;
+       u16 queue_mapping;
+       u16 ptype;
+       struct rcu_head rcu;
+};
+
 struct tcf_skbedit {
-       struct tc_action        common;
-       u32             flags;
-       u32             priority;
-       u32             mark;
-       u32             mask;
-       u16             queue_mapping;
-       u16             ptype;
+       struct tc_action common;
+       struct tcf_skbedit_params __rcu *params;
 };
 #define to_skbedit(a) ((struct tcf_skbedit *)a)
 
@@ -37,15 +42,27 @@ struct tcf_skbedit {
 static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       if (a->ops && a->ops->type == TCA_ACT_SKBEDIT)
-               return to_skbedit(a)->flags == SKBEDIT_F_MARK;
+       u32 flags;
+
+       if (a->ops && a->ops->type == TCA_ACT_SKBEDIT) {
+               rcu_read_lock();
+               flags = rcu_dereference(to_skbedit(a)->params)->flags;
+               rcu_read_unlock();
+               return flags == SKBEDIT_F_MARK;
+       }
 #endif
        return false;
 }
 
 static inline u32 tcf_skbedit_mark(const struct tc_action *a)
 {
-       return to_skbedit(a)->mark;
+       u32 mark;
+
+       rcu_read_lock();
+       mark = rcu_dereference(to_skbedit(a)->params)->mark;
+       rcu_read_unlock();
+
+       return mark;
 }
 
 #endif /* __NET_TC_SKBEDIT_H */
index cd3ecda9386a680e009ca261ea535feaa1349d74..d769dc20359b21d7247c548ddbe9c6d4e2357277 100644 (file)
@@ -473,19 +473,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
  */
 static inline void tcp_synq_overflow(const struct sock *sk)
 {
-       unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
-       unsigned long now = jiffies;
+       unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+       unsigned int now = jiffies;
 
-       if (time_after(now, last_overflow + HZ))
+       if (time_after32(now, last_overflow + HZ))
                tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
 }
 
 /* syncookies: no recent synqueue overflow on this listening socket? */
 static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
 {
-       unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+       unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+       unsigned int now = jiffies;
 
-       return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
+       return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
 }
 
 static inline u32 tcp_cookie_time(void)
@@ -963,6 +964,8 @@ struct rate_sample {
        u32  prior_delivered;   /* tp->delivered at "prior_mstamp" */
        s32  delivered;         /* number of packets delivered over interval */
        long interval_us;       /* time for tp->delivered to incr "delivered" */
+       u32 snd_interval_us;    /* snd interval for delivered packets */
+       u32 rcv_interval_us;    /* rcv interval for delivered packets */
        long rtt_us;            /* RTT of last (S)ACKed packet (or -1) */
        int  losses;            /* number of packets marked lost upon ACK */
        u32  acked_sacked;      /* number of packets newly (S)ACKed upon ACK */
@@ -1194,6 +1197,17 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
        return tp->is_cwnd_limited;
 }
 
+/* BBR congestion control needs pacing.
+ * Same remark for SO_MAX_PACING_RATE.
+ * sch_fq packet scheduler is efficiently handling pacing,
+ * but is not always installed/used.
+ * Return true if TCP stack should pace packets itself.
+ */
+static inline bool tcp_needs_internal_pacing(const struct sock *sk)
+{
+       return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
+}
+
 /* Something is really bad, we could not queue an additional packet,
  * because qdisc is full or receiver sent a 0 window.
  * We do not want to add fuel to the fire, or abort too early,
@@ -1371,7 +1385,8 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
 {
        if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
                return true;
-       if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
+       if (unlikely(!time_before32(ktime_get_seconds(),
+                                   rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)))
                return true;
        /*
         * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
@@ -1401,7 +1416,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
 
           However, we can relax time bounds for RST segments to MSL.
         */
-       if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
+       if (rst && !time_before32(ktime_get_seconds(),
+                                 rx_opt->ts_recent_stamp + TCP_PAWS_MSL))
                return false;
        return true;
 }
@@ -1787,7 +1803,7 @@ void tcp_v4_destroy_sock(struct sock *sk);
 
 struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
                                netdev_features_t features);
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
 int tcp_gro_complete(struct sk_buff *skb);
 
 void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
index 70c273777fe9fe27b2ef1ba7c2c80970da8ea5c4..d8b3b6578c010e46972af88b52d9bee0bc109140 100644 (file)
@@ -83,6 +83,16 @@ struct tls_device {
        void (*unhash)(struct tls_device *device, struct sock *sk);
 };
 
+enum {
+       TLS_BASE,
+       TLS_SW,
+#ifdef CONFIG_TLS_DEVICE
+       TLS_HW,
+#endif
+       TLS_HW_RECORD,
+       TLS_NUM_CONFIG,
+};
+
 struct tls_sw_context_tx {
        struct crypto_aead *aead_send;
        struct crypto_wait async_wait;
@@ -128,7 +138,7 @@ struct tls_record_info {
        skb_frag_t frags[MAX_SKB_FRAGS];
 };
 
-struct tls_offload_context {
+struct tls_offload_context_tx {
        struct crypto_aead *aead_send;
        spinlock_t lock;        /* protects records list */
        struct list_head records_list;
@@ -147,8 +157,8 @@ struct tls_offload_context {
 #define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *)))
 };
 
-#define TLS_OFFLOAD_CONTEXT_SIZE                                               \
-       (ALIGN(sizeof(struct tls_offload_context), sizeof(void *)) +           \
+#define TLS_OFFLOAD_CONTEXT_SIZE_TX                                            \
+       (ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) +        \
         TLS_DRIVER_STATE_SIZE)
 
 enum {
@@ -197,6 +207,7 @@ struct tls_context {
        int (*push_pending_record)(struct sock *sk, int flags);
 
        void (*sk_write_space)(struct sock *sk);
+       void (*sk_destruct)(struct sock *sk);
        void (*sk_proto_close)(struct sock *sk, long timeout);
 
        int  (*setsockopt)(struct sock *sk, int level,
@@ -209,13 +220,27 @@ struct tls_context {
        void (*unhash)(struct sock *sk);
 };
 
+struct tls_offload_context_rx {
+       /* sw must be the first member of tls_offload_context_rx */
+       struct tls_sw_context_rx sw;
+       atomic64_t resync_req;
+       u8 driver_state[];
+       /* The TLS layer reserves room for driver specific state
+        * Currently the belief is that there is not enough
+        * driver specific state to justify another layer of indirection
+        */
+};
+
+#define TLS_OFFLOAD_CONTEXT_SIZE_RX                                    \
+       (ALIGN(sizeof(struct tls_offload_context_rx), sizeof(void *)) + \
+        TLS_DRIVER_STATE_SIZE)
+
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 int tls_sk_query(struct sock *sk, int optname, char __user *optval,
                int __user *optlen);
 int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
                  unsigned int optlen);
 
-
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
@@ -223,6 +248,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_free_resources_tx(struct sock *sk);
 void tls_sw_free_resources_rx(struct sock *sk);
+void tls_sw_release_resources_rx(struct sock *sk);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                   int nonblock, int flags, int *addr_len);
 unsigned int tls_sw_poll(struct file *file, struct socket *sock,
@@ -239,7 +265,7 @@ void tls_device_sk_destruct(struct sock *sk);
 void tls_device_init(void);
 void tls_device_cleanup(void);
 
-struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
                                       u32 seq, u64 *p_record_sn);
 
 static inline bool tls_record_is_start_marker(struct tls_record_info *rec)
@@ -289,11 +315,19 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
        return tls_ctx->pending_open_record_frags;
 }
 
+struct sk_buff *
+tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
+                     struct sk_buff *skb);
+
 static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
 {
-       return sk_fullsock(sk) &&
-              /* matches smp_store_release in tls_set_device_offload */
-              smp_load_acquire(&sk->sk_destruct) == &tls_device_sk_destruct;
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+       return sk_fullsock(sk) &
+              (smp_load_acquire(&sk->sk_validate_xmit_skb) ==
+              &tls_validate_xmit_skb);
+#else
+       return false;
+#endif
 }
 
 static inline void tls_err_abort(struct sock *sk, int err)
@@ -380,23 +414,47 @@ static inline struct tls_sw_context_tx *tls_sw_ctx_tx(
        return (struct tls_sw_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
-static inline struct tls_offload_context *tls_offload_ctx(
-               const struct tls_context *tls_ctx)
+static inline struct tls_offload_context_tx *
+tls_offload_ctx_tx(const struct tls_context *tls_ctx)
 {
-       return (struct tls_offload_context *)tls_ctx->priv_ctx_tx;
+       return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
+static inline struct tls_offload_context_rx *
+tls_offload_ctx_rx(const struct tls_context *tls_ctx)
+{
+       return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx;
+}
+
+/* The TLS context is valid until sk_destruct is called */
+static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
+
+       atomic64_set(&rx_ctx->resync_req, ((((uint64_t)seq) << 32) | 1));
+}
+
+
 int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
                      unsigned char *record_type);
 void tls_register_device(struct tls_device *device);
 void tls_unregister_device(struct tls_device *device);
+int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
+int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+               struct scatterlist *sgout);
 
 struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
                                      struct net_device *dev,
                                      struct sk_buff *skb);
 
 int tls_sw_fallback_init(struct sock *sk,
-                        struct tls_offload_context *offload_ctx,
+                        struct tls_offload_context_tx *offload_ctx,
                         struct tls_crypto_info *crypto_info);
 
+int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
+
+void tls_device_offload_cleanup_rx(struct sock *sk);
+void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn);
+
 #endif /* _TLS_OFFLOAD_H */
index f6a3543e52477d0b3ec6c883fff554c4e924e0d6..a8f6020f1196edc9940cbb6c605a06279db4fd36 100644 (file)
@@ -42,8 +42,7 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
                                    struct sk_buff *skb);
 
 int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg,
-                         struct flowi6 *fl6, struct ipcm6_cookie *ipc6,
-                         struct sockcm_cookie *sockc);
+                         struct flowi6 *fl6, struct ipcm6_cookie *ipc6);
 
 void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
                               __u16 srcp, __u16 destp, int rqueue, int bucket);
index 81afdacd4fff04bd05335da85a7a06b1996282f8..8482a990b0bb8e781883d3e09ea84a6f345863ed 100644 (file)
@@ -170,8 +170,8 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
 typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
                                     __be16 dport);
 
-struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
-                                struct udphdr *uh, udp_lookup_t lookup);
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+                               struct udphdr *uh, udp_lookup_t lookup);
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
index b95a6927c7185eae2481d21235fa106e8a7ddb94..fe680ab6b15a18aba378d161d57461176b75d8cb 100644 (file)
@@ -65,9 +65,9 @@ static inline int udp_sock_create(struct net *net,
 
 typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
 typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
-typedef struct sk_buff **(*udp_tunnel_gro_receive_t)(struct sock *sk,
-                                                    struct sk_buff **head,
-                                                    struct sk_buff *skb);
+typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
+                                                   struct list_head *head,
+                                                   struct sk_buff *skb);
 typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb,
                                         int nhoff);
 
index 2deea7166a3486f66a2fd954acc3fcfe2b9d6440..fcb033f51d8c3d00945e3ad6e42d9128fb8bf9d2 100644 (file)
@@ -144,4 +144,17 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
        return unlikely(xdp->data_meta > xdp->data);
 }
 
+struct xdp_attachment_info {
+       struct bpf_prog *prog;
+       u32 flags;
+};
+
+struct netdev_bpf;
+int xdp_attachment_query(struct xdp_attachment_info *info,
+                        struct netdev_bpf *bpf);
+bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
+                            struct netdev_bpf *bpf);
+void xdp_attachment_setup(struct xdp_attachment_info *info,
+                         struct netdev_bpf *bpf);
+
 #endif /* __LINUX_NET_XDP_H__ */
index 557122846e0e470ddd998ed972a84be46fae0ef7..ca820945f30c18370797f2fc881e686b18a780db 100644 (file)
@@ -23,6 +23,7 @@
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/flow.h>
+#include <net/gro_cells.h>
 
 #include <linux/interrupt.h>
 
@@ -147,6 +148,7 @@ struct xfrm_state {
        struct xfrm_id          id;
        struct xfrm_selector    sel;
        struct xfrm_mark        mark;
+       u32                     if_id;
        u32                     tfcpad;
 
        u32                     genid;
@@ -166,7 +168,7 @@ struct xfrm_state {
                int             header_len;
                int             trailer_len;
                u32             extra_flags;
-               u32             output_mark;
+               struct xfrm_mark        smark;
        } props;
 
        struct xfrm_lifetime_cfg lft;
@@ -225,7 +227,7 @@ struct xfrm_state {
        long            saved_tmo;
 
        /* Last used time */
-       unsigned long           lastused;
+       time64_t                lastused;
 
        struct page_frag xfrag;
 
@@ -292,6 +294,13 @@ struct xfrm_replay {
        int     (*overflow)(struct xfrm_state *x, struct sk_buff *skb);
 };
 
+struct xfrm_if_cb {
+       struct xfrm_if  *(*decode_session)(struct sk_buff *skb);
+};
+
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
+void xfrm_if_unregister_cb(void);
+
 struct net_device;
 struct xfrm_type;
 struct xfrm_dst;
@@ -323,7 +332,6 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
                      const struct km_event *c);
-void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
@@ -574,6 +582,7 @@ struct xfrm_policy {
        atomic_t                genid;
        u32                     priority;
        u32                     index;
+       u32                     if_id;
        struct xfrm_mark        mark;
        struct xfrm_selector    selector;
        struct xfrm_lifetime_cfg lft;
@@ -1037,6 +1046,22 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
 
+struct xfrm_if_parms {
+       char name[IFNAMSIZ];    /* name of XFRM device */
+       int link;               /* ifindex of underlying L2 interface */
+       u32 if_id;              /* interface identifyer */
+};
+
+struct xfrm_if {
+       struct xfrm_if __rcu *next;     /* next interface in list */
+       struct net_device *dev;         /* virtual device associated with interface */
+       struct net_device *phydev;      /* physical device */
+       struct net *net;                /* netns for packet i/o */
+       struct xfrm_if_parms p;         /* interface parms */
+
+       struct gro_cells gro_cells;
+};
+
 struct xfrm_offload {
        /* Output sequence number for replay protection on offloading. */
        struct {
@@ -1532,8 +1557,8 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
                                   const struct flowi *fl,
                                   struct xfrm_tmpl *tmpl,
                                   struct xfrm_policy *pol, int *err,
-                                  unsigned short family);
-struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark,
+                                  unsigned short family, u32 if_id);
+struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
                                       xfrm_address_t *daddr,
                                       xfrm_address_t *saddr,
                                       unsigned short family,
@@ -1690,20 +1715,20 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
                     void *);
 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
                                          u8 type, int dir,
                                          struct xfrm_selector *sel,
                                          struct xfrm_sec_ctx *ctx, int delete,
                                          int *err);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir,
-                                    u32 id, int delete, int *err);
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8,
+                                    int dir, u32 id, int delete, int *err);
 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
 void xfrm_policy_hash_rebuild(struct net *net);
 u32 xfrm_get_acqseq(void);
 int verify_spi_info(u8 proto, u32 min, u32 max);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
 struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
-                                u8 mode, u32 reqid, u8 proto,
+                                u8 mode, u32 reqid, u32 if_id, u8 proto,
                                 const xfrm_address_t *daddr,
                                 const xfrm_address_t *saddr, int create,
                                 unsigned short family);
@@ -2012,6 +2037,22 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m)
        return ret;
 }
 
+static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x)
+{
+       struct xfrm_mark *m = &x->props.smark;
+
+       return (m->v & m->m) | (mark & ~m->m);
+}
+
+static inline int xfrm_if_id_put(struct sk_buff *skb, __u32 if_id)
+{
+       int ret = 0;
+
+       if (if_id)
+               ret = nla_put_u32(skb, XFRMA_IF_ID, if_id);
+       return ret;
+}
+
 static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
                                    unsigned int family)
 {
index 9c886739246ae4e91d8369f4c690bccd56f27912..00aa72ce0e7c79b18b4f597cd1287df1e2bd4aff 100644 (file)
@@ -223,6 +223,13 @@ DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_entry,
        TP_ARGS(skb)
 );
 
+DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_list_entry,
+
+       TP_PROTO(const struct sk_buff *skb),
+
+       TP_ARGS(skb)
+);
+
 DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_entry,
 
        TP_PROTO(const struct sk_buff *skb),
index 4fff00e9da8ac43b581fa95a318a59db43832611..196587b8f204de13da0529c3cce46b68df75b4ac 100644 (file)
@@ -211,18 +211,18 @@ enum rxrpc_congest_change {
        rxrpc_cong_saw_nack,
 };
 
-enum rxrpc_tx_fail_trace {
-       rxrpc_tx_fail_call_abort,
-       rxrpc_tx_fail_call_ack,
-       rxrpc_tx_fail_call_data_frag,
-       rxrpc_tx_fail_call_data_nofrag,
-       rxrpc_tx_fail_call_final_resend,
-       rxrpc_tx_fail_conn_abort,
-       rxrpc_tx_fail_conn_challenge,
-       rxrpc_tx_fail_conn_response,
-       rxrpc_tx_fail_reject,
-       rxrpc_tx_fail_version_keepalive,
-       rxrpc_tx_fail_version_reply,
+enum rxrpc_tx_point {
+       rxrpc_tx_point_call_abort,
+       rxrpc_tx_point_call_ack,
+       rxrpc_tx_point_call_data_frag,
+       rxrpc_tx_point_call_data_nofrag,
+       rxrpc_tx_point_call_final_resend,
+       rxrpc_tx_point_conn_abort,
+       rxrpc_tx_point_rxkad_challenge,
+       rxrpc_tx_point_rxkad_response,
+       rxrpc_tx_point_reject,
+       rxrpc_tx_point_version_keepalive,
+       rxrpc_tx_point_version_reply,
 };
 
 #endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */
@@ -396,7 +396,7 @@ enum rxrpc_tx_fail_trace {
 #define rxrpc_propose_ack_outcomes \
        EM(rxrpc_propose_ack_subsume,           " Subsume") \
        EM(rxrpc_propose_ack_update,            " Update") \
-       E_(rxrpc_propose_ack_use,               "")
+       E_(rxrpc_propose_ack_use,               " New")
 
 #define rxrpc_congest_modes \
        EM(RXRPC_CALL_CONGEST_AVOIDANCE,        "CongAvoid") \
@@ -452,18 +452,18 @@ enum rxrpc_tx_fail_trace {
        EM(RXRPC_CALL_LOCAL_ERROR,              "LocalError") \
        E_(RXRPC_CALL_NETWORK_ERROR,            "NetError")
 
-#define rxrpc_tx_fail_traces \
-       EM(rxrpc_tx_fail_call_abort,            "CallAbort") \
-       EM(rxrpc_tx_fail_call_ack,              "CallAck") \
-       EM(rxrpc_tx_fail_call_data_frag,        "CallDataFrag") \
-       EM(rxrpc_tx_fail_call_data_nofrag,      "CallDataNofrag") \
-       EM(rxrpc_tx_fail_call_final_resend,     "CallFinalResend") \
-       EM(rxrpc_tx_fail_conn_abort,            "ConnAbort") \
-       EM(rxrpc_tx_fail_conn_challenge,        "ConnChall") \
-       EM(rxrpc_tx_fail_conn_response,         "ConnResp") \
-       EM(rxrpc_tx_fail_reject,                "Reject") \
-       EM(rxrpc_tx_fail_version_keepalive,     "VerKeepalive") \
-       E_(rxrpc_tx_fail_version_reply,         "VerReply")
+#define rxrpc_tx_points \
+       EM(rxrpc_tx_point_call_abort,           "CallAbort") \
+       EM(rxrpc_tx_point_call_ack,             "CallAck") \
+       EM(rxrpc_tx_point_call_data_frag,       "CallDataFrag") \
+       EM(rxrpc_tx_point_call_data_nofrag,     "CallDataNofrag") \
+       EM(rxrpc_tx_point_call_final_resend,    "CallFinalResend") \
+       EM(rxrpc_tx_point_conn_abort,           "ConnAbort") \
+       EM(rxrpc_tx_point_reject,               "Reject") \
+       EM(rxrpc_tx_point_rxkad_challenge,      "RxkadChall") \
+       EM(rxrpc_tx_point_rxkad_response,       "RxkadResp") \
+       EM(rxrpc_tx_point_version_keepalive,    "VerKeepalive") \
+       E_(rxrpc_tx_point_version_reply,        "VerReply")
 
 /*
  * Export enum symbols via userspace.
@@ -488,7 +488,7 @@ rxrpc_propose_ack_traces;
 rxrpc_propose_ack_outcomes;
 rxrpc_congest_modes;
 rxrpc_congest_changes;
-rxrpc_tx_fail_traces;
+rxrpc_tx_points;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -801,7 +801,7 @@ TRACE_EVENT(rxrpc_transmit,
            );
 
 TRACE_EVENT(rxrpc_rx_data,
-           TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
+           TP_PROTO(unsigned int call, rxrpc_seq_t seq,
                     rxrpc_serial_t serial, u8 flags, u8 anno),
 
            TP_ARGS(call, seq, serial, flags, anno),
@@ -815,7 +815,7 @@ TRACE_EVENT(rxrpc_rx_data,
                             ),
 
            TP_fast_assign(
-                   __entry->call = call->debug_id;
+                   __entry->call = call;
                    __entry->seq = seq;
                    __entry->serial = serial;
                    __entry->flags = flags;
@@ -918,6 +918,37 @@ TRACE_EVENT(rxrpc_rx_rwind_change,
                      __entry->wake ? " wake" : "")
            );
 
+TRACE_EVENT(rxrpc_tx_packet,
+           TP_PROTO(unsigned int call_id, struct rxrpc_wire_header *whdr,
+                    enum rxrpc_tx_point where),
+
+           TP_ARGS(call_id, whdr, where),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int,                       call    )
+                   __field(enum rxrpc_tx_point,                where   )
+                   __field_struct(struct rxrpc_wire_header,    whdr    )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call_id;
+                   memcpy(&__entry->whdr, whdr, sizeof(__entry->whdr));
+                          ),
+
+           TP_printk("c=%08x %08x:%08x:%08x:%04x %08x %08x %02x %02x %s %s",
+                     __entry->call,
+                     ntohl(__entry->whdr.epoch),
+                     ntohl(__entry->whdr.cid),
+                     ntohl(__entry->whdr.callNumber),
+                     ntohs(__entry->whdr.serviceId),
+                     ntohl(__entry->whdr.serial),
+                     ntohl(__entry->whdr.seq),
+                     __entry->whdr.type, __entry->whdr.flags,
+                     __entry->whdr.type <= 15 ?
+                     __print_symbolic(__entry->whdr.type, rxrpc_pkts) : "?UNK",
+                     __print_symbolic(__entry->where, rxrpc_tx_points))
+           );
+
 TRACE_EVENT(rxrpc_tx_data,
            TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
                     rxrpc_serial_t serial, u8 flags, bool retrans, bool lose),
@@ -928,6 +959,8 @@ TRACE_EVENT(rxrpc_tx_data,
                    __field(unsigned int,               call            )
                    __field(rxrpc_seq_t,                seq             )
                    __field(rxrpc_serial_t,             serial          )
+                   __field(u32,                        cid             )
+                   __field(u32,                        call_id         )
                    __field(u8,                         flags           )
                    __field(bool,                       retrans         )
                    __field(bool,                       lose            )
@@ -935,6 +968,8 @@ TRACE_EVENT(rxrpc_tx_data,
 
            TP_fast_assign(
                    __entry->call = call->debug_id;
+                   __entry->cid = call->cid;
+                   __entry->call_id = call->call_id;
                    __entry->seq = seq;
                    __entry->serial = serial;
                    __entry->flags = flags;
@@ -942,8 +977,10 @@ TRACE_EVENT(rxrpc_tx_data,
                    __entry->lose = lose;
                           ),
 
-           TP_printk("c=%08x DATA %08x q=%08x fl=%02x%s%s",
+           TP_printk("c=%08x DATA %08x:%08x %08x q=%08x fl=%02x%s%s",
                      __entry->call,
+                     __entry->cid,
+                     __entry->call_id,
                      __entry->serial,
                      __entry->seq,
                      __entry->flags,
@@ -952,7 +989,7 @@ TRACE_EVENT(rxrpc_tx_data,
            );
 
 TRACE_EVENT(rxrpc_tx_ack,
-           TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial,
+           TP_PROTO(unsigned int call, rxrpc_serial_t serial,
                     rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial,
                     u8 reason, u8 n_acks),
 
@@ -968,7 +1005,7 @@ TRACE_EVENT(rxrpc_tx_ack,
                             ),
 
            TP_fast_assign(
-                   __entry->call = call ? call->debug_id : 0;
+                   __entry->call = call;
                    __entry->serial = serial;
                    __entry->ack_first = ack_first;
                    __entry->ack_serial = ack_serial;
@@ -1434,29 +1471,29 @@ TRACE_EVENT(rxrpc_rx_icmp,
 
 TRACE_EVENT(rxrpc_tx_fail,
            TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial, int ret,
-                    enum rxrpc_tx_fail_trace what),
+                    enum rxrpc_tx_point where),
 
-           TP_ARGS(debug_id, serial, ret, what),
+           TP_ARGS(debug_id, serial, ret, where),
 
            TP_STRUCT__entry(
                    __field(unsigned int,               debug_id        )
                    __field(rxrpc_serial_t,             serial          )
                    __field(int,                        ret             )
-                   __field(enum rxrpc_tx_fail_trace,   what            )
+                   __field(enum rxrpc_tx_point,        where           )
                             ),
 
            TP_fast_assign(
                    __entry->debug_id = debug_id;
                    __entry->serial = serial;
                    __entry->ret = ret;
-                   __entry->what = what;
+                   __entry->where = where;
                           ),
 
            TP_printk("c=%08x r=%x ret=%d %s",
                      __entry->debug_id,
                      __entry->serial,
                      __entry->ret,
-                     __print_symbolic(__entry->what, rxrpc_tx_fail_traces))
+                     __print_symbolic(__entry->where, rxrpc_tx_points))
            );
 
 TRACE_EVENT(rxrpc_call_reset,
@@ -1491,6 +1528,26 @@ TRACE_EVENT(rxrpc_call_reset,
                      __entry->tx_seq, __entry->rx_seq)
            );
 
+TRACE_EVENT(rxrpc_notify_socket,
+           TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial),
+
+           TP_ARGS(debug_id, serial),
+
+           TP_STRUCT__entry(
+                   __field(unsigned int,               debug_id        )
+                   __field(rxrpc_serial_t,             serial          )
+                            ),
+
+           TP_fast_assign(
+                   __entry->debug_id = debug_id;
+                   __entry->serial = serial;
+                          ),
+
+           TP_printk("c=%08x r=%08x",
+                     __entry->debug_id,
+                     __entry->serial)
+           );
+
 #endif /* _TRACE_RXRPC_H */
 
 /* This part must be outside protection */
index 3176a393110726c4ccea512ab572d4f827a302f7..a0c4b8a3096604a9817a0f78f58409123a300352 100644 (file)
                EM(TCP_CLOSING)                 \
                EMe(TCP_NEW_SYN_RECV)
 
+#define skmem_kind_names                       \
+               EM(SK_MEM_SEND)                 \
+               EMe(SK_MEM_RECV)
+
 /* enums need to be exported to user space */
 #undef EM
 #undef EMe
@@ -44,6 +48,7 @@
 family_names
 inet_protocol_names
 tcp_state_names
+skmem_kind_names
 
 #undef EM
 #undef EMe
@@ -59,6 +64,9 @@ tcp_state_names
 #define show_tcp_state_name(val)        \
        __print_symbolic(val, tcp_state_names)
 
+#define show_skmem_kind_names(val)     \
+       __print_symbolic(val, skmem_kind_names)
+
 TRACE_EVENT(sock_rcvqueue_full,
 
        TP_PROTO(struct sock *sk, struct sk_buff *skb),
@@ -83,9 +91,9 @@ TRACE_EVENT(sock_rcvqueue_full,
 
 TRACE_EVENT(sock_exceed_buf_limit,
 
-       TP_PROTO(struct sock *sk, struct proto *prot, long allocated),
+       TP_PROTO(struct sock *sk, struct proto *prot, long allocated, int kind),
 
-       TP_ARGS(sk, prot, allocated),
+       TP_ARGS(sk, prot, allocated, kind),
 
        TP_STRUCT__entry(
                __array(char, name, 32)
@@ -93,6 +101,10 @@ TRACE_EVENT(sock_exceed_buf_limit,
                __field(long, allocated)
                __field(int, sysctl_rmem)
                __field(int, rmem_alloc)
+               __field(int, sysctl_wmem)
+               __field(int, wmem_alloc)
+               __field(int, wmem_queued)
+               __field(int, kind)
        ),
 
        TP_fast_assign(
@@ -101,17 +113,25 @@ TRACE_EVENT(sock_exceed_buf_limit,
                __entry->allocated = allocated;
                __entry->sysctl_rmem = sk_get_rmem0(sk, prot);
                __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
+               __entry->sysctl_wmem = sk_get_wmem0(sk, prot);
+               __entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc);
+               __entry->wmem_queued = sk->sk_wmem_queued;
+               __entry->kind = kind;
        ),
 
-       TP_printk("proto:%s sysctl_mem=%ld,%ld,%ld allocated=%ld "
-               "sysctl_rmem=%d rmem_alloc=%d",
+       TP_printk("proto:%s sysctl_mem=%ld,%ld,%ld allocated=%ld sysctl_rmem=%d rmem_alloc=%d sysctl_wmem=%d wmem_alloc=%d wmem_queued=%d kind=%s",
                __entry->name,
                __entry->sysctl_mem[0],
                __entry->sysctl_mem[1],
                __entry->sysctl_mem[2],
                __entry->allocated,
                __entry->sysctl_rmem,
-               __entry->rmem_alloc)
+               __entry->rmem_alloc,
+               __entry->sysctl_wmem,
+               __entry->wmem_alloc,
+               __entry->wmem_queued,
+               show_skmem_kind_names(__entry->kind)
+       )
 );
 
 TRACE_EVENT(inet_sock_set_state,
index 0ae758c90e546b84ff503f8aa1a84809454eccd1..a12692e5f7a8462b22791a09fcb0ef41d0450938 100644 (file)
 
 #define SO_ZEROCOPY            60
 
+#define SO_TXTIME              61
+#define SCM_TXTIME             SO_TXTIME
+
 #endif /* __ASM_GENERIC_SOCKET_H */
index b7db3261c62d124760e98d9c851c1b01e64bdb03..870113916caca5ef3acbad43c821d5b5111d0ffc 100644 (file)
@@ -1826,7 +1826,7 @@ union bpf_attr {
  *             A non-negative value equal to or less than *size* on success,
  *             or a negative error in case of failure.
  *
- * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
  *     Description
  *             This helper is similar to **bpf_skb_load_bytes**\ () in that
  *             it provides an easy way to load *len* bytes from *offset*
@@ -1877,7 +1877,7 @@ union bpf_attr {
  *             * < 0 if any input argument is invalid
  *             *   0 on success (packet is forwarded, nexthop neighbor exists)
  *             * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
- *             *     packet is not forwarded or needs assist from full stack
+ *               packet is not forwarded or needs assist from full stack
  *
  * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
  *     Description
@@ -2033,7 +2033,6 @@ union bpf_attr {
  *             This helper is only available is the kernel was compiled with
  *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
  *             "**y**".
- *
  *     Return
  *             0
  *
@@ -2053,7 +2052,6 @@ union bpf_attr {
  *             This helper is only available is the kernel was compiled with
  *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
  *             "**y**".
- *
  *     Return
  *             0
  *
@@ -2557,6 +2555,9 @@ enum {
                                         * Arg1: old_state
                                         * Arg2: new_state
                                         */
+       BPF_SOCK_OPS_TCP_LISTEN_CB,     /* Called on listen(2), right after
+                                        * socket transition to LISTEN state.
+                                        */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
index d7f97ac197a92b2960041ddf9474d74e4f13b9ea..0afb7d8e867f70827138c273e302cb9899b4fd7b 100644 (file)
@@ -77,7 +77,7 @@ typedef __u32 canid_t;
 /*
  * Controller Area Network Error Message Frame Mask structure
  *
- * bit 0-28    : error class mask (see include/linux/can/error.h)
+ * bit 0-28    : error class mask (see include/uapi/linux/can/error.h)
  * bit 29-31   : set to zero
  */
 typedef __u32 can_err_mask_t;
index 60aa2e4466981ab63027b5092c81d4e4e2010d77..69df19aa8e72890919cdab4985b5c2361de2e710 100644 (file)
@@ -233,7 +233,8 @@ struct cee_pfc {
  *     2       Well known port number over TCP or SCTP
  *     3       Well known port number over UDP or DCCP
  *     4       Well known port number over TCP, SCTP, UDP, or DCCP
- *     5-7     Reserved
+ *     5       Differentiated Services Code Point (DSCP) value
+ *     6-7     Reserved
  *
  *  Selector field values for CEE
  *     0       Ethertype
index 75cb5450c851254764b55445384857b932f012a9..79407bbd296d8d78f113b34c34d515aaa5c188a4 100644 (file)
@@ -78,6 +78,17 @@ enum devlink_command {
         */
        DEVLINK_CMD_RELOAD,
 
+       DEVLINK_CMD_PARAM_GET,          /* can dump */
+       DEVLINK_CMD_PARAM_SET,
+       DEVLINK_CMD_PARAM_NEW,
+       DEVLINK_CMD_PARAM_DEL,
+
+       DEVLINK_CMD_REGION_GET,
+       DEVLINK_CMD_REGION_SET,
+       DEVLINK_CMD_REGION_NEW,
+       DEVLINK_CMD_REGION_DEL,
+       DEVLINK_CMD_REGION_READ,
+
        /* add new commands above here */
        __DEVLINK_CMD_MAX,
        DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -142,6 +153,16 @@ enum devlink_port_flavour {
                                   */
 };
 
+enum devlink_param_cmode {
+       DEVLINK_PARAM_CMODE_RUNTIME,
+       DEVLINK_PARAM_CMODE_DRIVERINIT,
+       DEVLINK_PARAM_CMODE_PERMANENT,
+
+       /* Add new configuration modes above */
+       __DEVLINK_PARAM_CMODE_MAX,
+       DEVLINK_PARAM_CMODE_MAX = __DEVLINK_PARAM_CMODE_MAX - 1
+};
+
 enum devlink_attr {
        /* don't change the order or add anything between, this is ABI! */
        DEVLINK_ATTR_UNSPEC,
@@ -238,6 +259,27 @@ enum devlink_attr {
        DEVLINK_ATTR_PORT_NUMBER,               /* u32 */
        DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, /* u32 */
 
+       DEVLINK_ATTR_PARAM,                     /* nested */
+       DEVLINK_ATTR_PARAM_NAME,                /* string */
+       DEVLINK_ATTR_PARAM_GENERIC,             /* flag */
+       DEVLINK_ATTR_PARAM_TYPE,                /* u8 */
+       DEVLINK_ATTR_PARAM_VALUES_LIST,         /* nested */
+       DEVLINK_ATTR_PARAM_VALUE,               /* nested */
+       DEVLINK_ATTR_PARAM_VALUE_DATA,          /* dynamic */
+       DEVLINK_ATTR_PARAM_VALUE_CMODE,         /* u8 */
+
+       DEVLINK_ATTR_REGION_NAME,               /* string */
+       DEVLINK_ATTR_REGION_SIZE,               /* u64 */
+       DEVLINK_ATTR_REGION_SNAPSHOTS,          /* nested */
+       DEVLINK_ATTR_REGION_SNAPSHOT,           /* nested */
+       DEVLINK_ATTR_REGION_SNAPSHOT_ID,        /* u32 */
+
+       DEVLINK_ATTR_REGION_CHUNKS,             /* nested */
+       DEVLINK_ATTR_REGION_CHUNK,              /* nested */
+       DEVLINK_ATTR_REGION_CHUNK_DATA,         /* binary */
+       DEVLINK_ATTR_REGION_CHUNK_ADDR,         /* u64 */
+       DEVLINK_ATTR_REGION_CHUNK_LEN,          /* u64 */
+
        /* add new attributes above here, update the policy in devlink.c */
 
        __DEVLINK_ATTR_MAX,
index dc64cfaf13da08564a8271e50a4edb89d221b148..c0151200f7d1cf65e961a69e3b4fca23a9ff4785 100644 (file)
@@ -20,12 +20,16 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP6     3
 #define SO_EE_ORIGIN_TXSTATUS  4
 #define SO_EE_ORIGIN_ZEROCOPY  5
+#define SO_EE_ORIGIN_TXTIME    6
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)     ((struct sockaddr*)((ee)+1))
 
 #define SO_EE_CODE_ZEROCOPY_COPIED     1
 
+#define SO_EE_CODE_TXTIME_INVALID_PARAM        1
+#define SO_EE_CODE_TXTIME_MISSED       2
+
 /**
  *     struct scm_timestamping - timestamps exposed through cmsg
  *
index 7363f18e65a553e12f4d1cc13844dfbf2bbe6f17..813282cc8af621bbead7b7250d81230b20e0616f 100644 (file)
@@ -902,13 +902,13 @@ struct ethtool_rx_flow_spec {
 static inline __u64 ethtool_get_flow_spec_ring(__u64 ring_cookie)
 {
        return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie;
-};
+}
 
 static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
 {
        return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >>
                                ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
-};
+}
 
 /**
  * struct ethtool_rxnfc - command to get or set RX flow classification rules
index cf01b68242448512416c1b1aa25f0904915aad0a..43391e2d1153adb701433d6794702b73f2d60297 100644 (file)
@@ -164,6 +164,8 @@ enum {
        IFLA_CARRIER_UP_COUNT,
        IFLA_CARRIER_DOWN_COUNT,
        IFLA_NEW_IFINDEX,
+       IFLA_MIN_MTU,
+       IFLA_MAX_MTU,
        __IFLA_MAX
 };
 
@@ -334,6 +336,7 @@ enum {
        IFLA_BRPORT_GROUP_FWD_MASK,
        IFLA_BRPORT_NEIGH_SUPPRESS,
        IFLA_BRPORT_ISOLATED,
+       IFLA_BRPORT_BACKUP_PORT,
        __IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -459,6 +462,16 @@ enum {
 
 #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
 
+/* XFRM section */
+enum {
+       IFLA_XFRM_UNSPEC,
+       IFLA_XFRM_LINK,
+       IFLA_XFRM_IF_ID,
+       __IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
 enum macsec_validation_type {
        MACSEC_VALIDATE_DISABLED = 0,
        MACSEC_VALIDATE_CHECK = 1,
@@ -920,6 +933,7 @@ enum {
        XDP_ATTACHED_DRV,
        XDP_ATTACHED_SKB,
        XDP_ATTACHED_HW,
+       XDP_ATTACHED_MULTI,
 };
 
 enum {
@@ -928,6 +942,9 @@ enum {
        IFLA_XDP_ATTACHED,
        IFLA_XDP_FLAGS,
        IFLA_XDP_PROG_ID,
+       IFLA_XDP_DRV_PROG_ID,
+       IFLA_XDP_SKB_PROG_ID,
+       IFLA_XDP_HW_PROG_ID,
        __IFLA_XDP_MAX,
 };
 
index 483b77af4eb8bffe4a465337b85686374e192dc0..db45d3e49a12c73669c0063fe44e0e620fcc5218 100644 (file)
@@ -30,6 +30,7 @@ enum {
        ILA_CMD_ADD,
        ILA_CMD_DEL,
        ILA_CMD_GET,
+       ILA_CMD_FLUSH,
 
        __ILA_CMD_MAX,
 };
index b24a742beae58974722348255ebe239584661905..e42d13b55cf3acafd90f0ec6d13ebd685ad3f684 100644 (file)
@@ -168,6 +168,7 @@ enum
        IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
        IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
        IPV4_DEVCONF_DROP_GRATUITOUS_ARP,
+       IPV4_DEVCONF_BC_FORWARDING,
        __IPV4_DEVCONF_MAX
 };
 
index 7d570c7bd117cf68510c9ee9e8b808ce90c0d89e..61158f5a1a5bc272743769adb382d609329a17db 100644 (file)
@@ -60,14 +60,14 @@ struct sockaddr_l2tpip6 {
 /*
  * Commands.
  * Valid TLVs of each command are:-
- * TUNNEL_CREATE       - CONN_ID, pw_type, netns, ifname, ipinfo, udpinfo, udpcsum, vlanid
+ * TUNNEL_CREATE       - CONN_ID, pw_type, netns, ifname, ipinfo, udpinfo, udpcsum
  * TUNNEL_DELETE       - CONN_ID
  * TUNNEL_MODIFY       - CONN_ID, udpcsum
  * TUNNEL_GETSTATS     - CONN_ID, (stats)
  * TUNNEL_GET          - CONN_ID, (...)
- * SESSION_CREATE      - SESSION_ID, PW_TYPE, data_seq, cookie, peer_cookie, l2spec
+ * SESSION_CREATE      - SESSION_ID, PW_TYPE, cookie, peer_cookie, l2spec
  * SESSION_DELETE      - SESSION_ID
- * SESSION_MODIFY      - SESSION_ID, data_seq
+ * SESSION_MODIFY      - SESSION_ID
  * SESSION_GET         - SESSION_ID, (...)
  * SESSION_GETSTATS    - SESSION_ID, (stats)
  *
@@ -95,7 +95,7 @@ enum {
        L2TP_ATTR_PW_TYPE,              /* u16, enum l2tp_pwtype */
        L2TP_ATTR_ENCAP_TYPE,           /* u16, enum l2tp_encap_type */
        L2TP_ATTR_OFFSET,               /* u16 (not used) */
-       L2TP_ATTR_DATA_SEQ,             /* u16 */
+       L2TP_ATTR_DATA_SEQ,             /* u16 (not used) */
        L2TP_ATTR_L2SPEC_TYPE,          /* u8, enum l2tp_l2spec_type */
        L2TP_ATTR_L2SPEC_LEN,           /* u8 (not used) */
        L2TP_ATTR_PROTO_VERSION,        /* u8 */
@@ -105,7 +105,7 @@ enum {
        L2TP_ATTR_SESSION_ID,           /* u32 */
        L2TP_ATTR_PEER_SESSION_ID,      /* u32 */
        L2TP_ATTR_UDP_CSUM,             /* u8 */
-       L2TP_ATTR_VLAN_ID,              /* u16 */
+       L2TP_ATTR_VLAN_ID,              /* u16 (not used) */
        L2TP_ATTR_COOKIE,               /* 0, 4 or 8 bytes */
        L2TP_ATTR_PEER_COOKIE,          /* 0, 4 or 8 bytes */
        L2TP_ATTR_DEBUG,                /* u32, enum l2tp_debug_flags */
@@ -119,8 +119,8 @@ enum {
        L2TP_ATTR_IP_DADDR,             /* u32 */
        L2TP_ATTR_UDP_SPORT,            /* u16 */
        L2TP_ATTR_UDP_DPORT,            /* u16 */
-       L2TP_ATTR_MTU,                  /* u16 */
-       L2TP_ATTR_MRU,                  /* u16 */
+       L2TP_ATTR_MTU,                  /* u16 (not used) */
+       L2TP_ATTR_MRU,                  /* u16 (not used) */
        L2TP_ATTR_STATS,                /* nested */
        L2TP_ATTR_IP6_SADDR,            /* struct in6_addr */
        L2TP_ATTR_IP6_DADDR,            /* struct in6_addr */
@@ -169,6 +169,7 @@ enum l2tp_encap_type {
        L2TP_ENCAPTYPE_IP,
 };
 
+/* For L2TP_ATTR_DATA_SEQ. Unused. */
 enum l2tp_seqmode {
        L2TP_SEQ_NONE = 0,
        L2TP_SEQ_IP = 1,
index b5c2fdcf23fdb66616fdd17e4060b85e45949728..a506216591d65b590c868c7f26e8b1355d9c4742 100644 (file)
 #define CTL1000_ENABLE_MASTER  0x1000
 
 /* 1000BASE-T Status register */
+#define LPA_1000MSFAIL         0x8000  /* Master/Slave resolution failure */
 #define LPA_1000LOCALRXOK      0x2000  /* Link partner local receiver status */
 #define LPA_1000REMRXOK                0x1000  /* Link partner remote receiver status */
 #define LPA_1000FULL           0x0800  /* Link partner 1000BASE-T full duplex */
index 10f9ff9426a220b700ea4c50219412e6389817d1..5d37a9ccce63c444aa876d31f38e416c72f603f7 100644 (file)
@@ -120,6 +120,7 @@ enum {
        IPMRA_TABLE_MROUTE_DO_ASSERT,
        IPMRA_TABLE_MROUTE_DO_PIM,
        IPMRA_TABLE_VIFS,
+       IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
        __IPMRA_TABLE_MAX
 };
 #define IPMRA_TABLE_MAX (__IPMRA_TABLE_MAX - 1)
@@ -173,5 +174,6 @@ enum {
 #define IGMPMSG_NOCACHE                1               /* Kern cache fill request to mrouted */
 #define IGMPMSG_WRONGVIF       2               /* For PIM assert processing (unused) */
 #define IGMPMSG_WHOLEPKT       3               /* For PIM Register processing */
+#define IGMPMSG_WRVIFWHOLE     4               /* For PIM Register and assert processing */
 
 #endif /* _UAPI__LINUX_MROUTE_H */
index 4fe104b2411f0dcda877b8c83b1c0dcfc4e68c7f..97ff3c17ec4d2021a728c71141f1baa39f707eee 100644 (file)
@@ -141,4 +141,22 @@ struct scm_ts_pktinfo {
        __u32 reserved[2];
 };
 
+/*
+ * SO_TXTIME gets a struct sock_txtime with flags being an integer bit
+ * field comprised of these values.
+ */
+enum txtime_flags {
+       SOF_TXTIME_DEADLINE_MODE = (1 << 0),
+       SOF_TXTIME_REPORT_ERRORS = (1 << 1),
+
+       SOF_TXTIME_FLAGS_LAST = SOF_TXTIME_REPORT_ERRORS,
+       SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
+                                SOF_TXTIME_FLAGS_LAST
+};
+
+struct sock_txtime {
+       clockid_t       clockid;        /* reference clockid */
+       __u32           flags;          /* as defined by enum txtime_flags */
+};
+
 #endif /* _NET_TIMESTAMPING_H */
index c84fcdfca862e67887bfe89dc8bea523c9a1d4d2..fac4edd553798cd87a24278bd4683632cd562f7f 100644 (file)
@@ -18,6 +18,7 @@ enum {
        NETCONFA_PROXY_NEIGH,
        NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
        NETCONFA_INPUT,
+       NETCONFA_BC_FORWARDING,
        __NETCONFA_MAX
 };
 #define NETCONFA_MAX   (__NETCONFA_MAX - 1)
index 8f2f2f4031836420e88f600b15308fea28f054b6..3738116b2bbecba98a10d348c4f926000c25ac91 100644 (file)
 
 #define NF_OSF_TTL_TRUE                        0       /* True ip and fingerprint TTL comparison */
 
+/* Check if ip TTL is less than fingerprint one */
+#define NF_OSF_TTL_LESS                        1
+
 /* Do not compare ip and fingerprint TTL at all */
 #define NF_OSF_TTL_NOCHECK             2
 
+#define NF_OSF_FLAGMASK                (NF_OSF_GENRE | NF_OSF_TTL | \
+                                NF_OSF_LOG | NF_OSF_INVERT)
 /* Wildcard MSS (kind of).
  * It is used to implement a state machine for the different wildcard values
  * of the MSS and window sizes.
@@ -83,4 +88,10 @@ enum iana_options {
        OSFOPT_EMPTY = 255,
 };
 
+enum nf_osf_attr_type {
+       OSF_ATTR_UNSPEC,
+       OSF_ATTR_FINGER,
+       OSF_ATTR_MAX,
+};
+
 #endif /* _NF_OSF_H */
index 89438e68dc030fd0ecf579bf5158e1042435a6b7..f466860bcf758e9eda958d434ec2b5fe489ec60f 100644 (file)
@@ -921,10 +921,12 @@ enum nft_socket_attributes {
 /*
  * enum nft_socket_keys - nf_tables socket expression keys
  *
- * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option_
+ * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option
+ * @NFT_SOCKET_MARK: Value of the socket mark
  */
 enum nft_socket_keys {
        NFT_SOCKET_TRANSPARENT,
+       NFT_SOCKET_MARK,
        __NFT_SOCKET_MAX
 };
 #define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1)
index 72956eceeb09689d3c76edec321dbc81d86b803c..b189007f4f28464c6ddd77db746eaaf0114067f4 100644 (file)
@@ -37,8 +37,7 @@
 
 #define XT_OSF_TTL_TRUE                NF_OSF_TTL_TRUE
 #define XT_OSF_TTL_NOCHECK     NF_OSF_TTL_NOCHECK
-
-#define XT_OSF_TTL_LESS        1       /* Check if ip TTL is less than fingerprint one */
+#define XT_OSF_TTL_LESS                NF_OSF_TTL_LESS
 
 #define xt_osf_wc              nf_osf_wc
 #define xt_osf_opt             nf_osf_opt
@@ -47,6 +46,7 @@
 #define xt_osf_finger          nf_osf_finger
 #define xt_osf_nlmsg           nf_osf_nlmsg
 
+#define xt_osf_attr_type       nf_osf_attr_type
 /*
  * Add/remove fingerprint from the kernel.
  */
@@ -56,10 +56,4 @@ enum xt_osf_msg_types {
        OSF_MSG_MAX,
 };
 
-enum xt_osf_attr_type {
-       OSF_ATTR_UNSPEC,
-       OSF_ATTR_FINGER,
-       OSF_ATTR_MAX,
-};
-
 #endif                         /* _XT_OSF_H */
index 27e4e441caacdbd590a1afad65c6f5c1b3ec3edd..7acc16f349427a772f507e6a25b66b5b95b210d7 100644 (file)
@@ -2237,6 +2237,9 @@ enum nl80211_commands {
  *      enforced.
  * @NL80211_ATTR_TXQ_QUANTUM: TXQ scheduler quantum (bytes). Number of bytes
  *      a flow is assigned on each round of the DRR scheduler.
+ * @NL80211_ATTR_HE_CAPABILITY: HE Capability information element (from
+ *     association request when used with NL80211_CMD_NEW_STATION). Can be set
+ *     only if %NL80211_STA_FLAG_WME is set.
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2677,6 +2680,8 @@ enum nl80211_attrs {
        NL80211_ATTR_TXQ_MEMORY_LIMIT,
        NL80211_ATTR_TXQ_QUANTUM,
 
+       NL80211_ATTR_HE_CAPABILITY,
+
        /* add attributes here, update the policy in nl80211.c */
 
        __NL80211_ATTR_AFTER_LAST,
@@ -2726,7 +2731,8 @@ enum nl80211_attrs {
 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY    24
 #define NL80211_HT_CAPABILITY_LEN              26
 #define NL80211_VHT_CAPABILITY_LEN             12
-
+#define NL80211_HE_MIN_CAPABILITY_LEN           16
+#define NL80211_HE_MAX_CAPABILITY_LEN           51
 #define NL80211_MAX_NR_CIPHER_SUITES           5
 #define NL80211_MAX_NR_AKM_SUITES              2
 
@@ -2853,6 +2859,38 @@ struct nl80211_sta_flag_update {
        __u32 set;
 } __attribute__((packed));
 
+/**
+ * enum nl80211_he_gi - HE guard interval
+ * @NL80211_RATE_INFO_HE_GI_0_8: 0.8 usec
+ * @NL80211_RATE_INFO_HE_GI_1_6: 1.6 usec
+ * @NL80211_RATE_INFO_HE_GI_3_2: 3.2 usec
+ */
+enum nl80211_he_gi {
+       NL80211_RATE_INFO_HE_GI_0_8,
+       NL80211_RATE_INFO_HE_GI_1_6,
+       NL80211_RATE_INFO_HE_GI_3_2,
+};
+
+/**
+ * enum nl80211_he_ru_alloc - HE RU allocation values
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_52: 52-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_106: 106-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_242: 242-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_484: 484-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_996: 996-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_2x996: 2x996-tone RU allocation
+ */
+enum nl80211_he_ru_alloc {
+       NL80211_RATE_INFO_HE_RU_ALLOC_26,
+       NL80211_RATE_INFO_HE_RU_ALLOC_52,
+       NL80211_RATE_INFO_HE_RU_ALLOC_106,
+       NL80211_RATE_INFO_HE_RU_ALLOC_242,
+       NL80211_RATE_INFO_HE_RU_ALLOC_484,
+       NL80211_RATE_INFO_HE_RU_ALLOC_996,
+       NL80211_RATE_INFO_HE_RU_ALLOC_2x996,
+};
+
 /**
  * enum nl80211_rate_info - bitrate information
  *
@@ -2885,6 +2923,13 @@ struct nl80211_sta_flag_update {
  * @NL80211_RATE_INFO_5_MHZ_WIDTH: 5 MHz width - note that this is
  *     a legacy rate and will be reported as the actual bitrate, i.e.
  *     a quarter of the base (20 MHz) rate
+ * @NL80211_RATE_INFO_HE_MCS: HE MCS index (u8, 0-11)
+ * @NL80211_RATE_INFO_HE_NSS: HE NSS value (u8, 1-8)
+ * @NL80211_RATE_INFO_HE_GI: HE guard interval identifier
+ *     (u8, see &enum nl80211_he_gi)
+ * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1)
+ * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then
+ *     non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc)
  * @__NL80211_RATE_INFO_AFTER_LAST: internal use
  */
 enum nl80211_rate_info {
@@ -2901,6 +2946,11 @@ enum nl80211_rate_info {
        NL80211_RATE_INFO_160_MHZ_WIDTH,
        NL80211_RATE_INFO_10_MHZ_WIDTH,
        NL80211_RATE_INFO_5_MHZ_WIDTH,
+       NL80211_RATE_INFO_HE_MCS,
+       NL80211_RATE_INFO_HE_NSS,
+       NL80211_RATE_INFO_HE_GI,
+       NL80211_RATE_INFO_HE_DCM,
+       NL80211_RATE_INFO_HE_RU_ALLOC,
 
        /* keep last */
        __NL80211_RATE_INFO_AFTER_LAST,
@@ -3166,6 +3216,38 @@ enum nl80211_mpath_info {
        NL80211_MPATH_INFO_MAX = __NL80211_MPATH_INFO_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_band_iftype_attr - Interface type data attributes
+ *
+ * @__NL80211_BAND_IFTYPE_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_BAND_IFTYPE_ATTR_IFTYPES: nested attribute containing a flag attribute
+ *     for each interface type that supports the band data
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC: HE MAC capabilities as in HE
+ *     capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY: HE PHY capabilities as in HE
+ *     capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET: HE supported NSS/MCS as in HE
+ *     capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as
+ *     defined in HE capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently
+ *     defined
+ * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_band_iftype_attr {
+       __NL80211_BAND_IFTYPE_ATTR_INVALID,
+
+       NL80211_BAND_IFTYPE_ATTR_IFTYPES,
+       NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC,
+       NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY,
+       NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
+       NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
+
+       /* keep last */
+       __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST,
+       NL80211_BAND_IFTYPE_ATTR_MAX = __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST - 1
+};
+
 /**
  * enum nl80211_band_attr - band attributes
  * @__NL80211_BAND_ATTR_INVALID: attribute number 0 is reserved
@@ -3181,6 +3263,8 @@ enum nl80211_mpath_info {
  * @NL80211_BAND_ATTR_VHT_MCS_SET: 32-byte attribute containing the MCS set as
  *     defined in 802.11ac
  * @NL80211_BAND_ATTR_VHT_CAPA: VHT capabilities, as in the HT information IE
+ * @NL80211_BAND_ATTR_IFTYPE_DATA: nested array attribute, with each entry using
+ *     attributes from &enum nl80211_band_iftype_attr
  * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined
  * @__NL80211_BAND_ATTR_AFTER_LAST: internal use
  */
@@ -3196,6 +3280,7 @@ enum nl80211_band_attr {
 
        NL80211_BAND_ATTR_VHT_MCS_SET,
        NL80211_BAND_ATTR_VHT_CAPA,
+       NL80211_BAND_ATTR_IFTYPE_DATA,
 
        /* keep last */
        __NL80211_BAND_ATTR_AFTER_LAST,
@@ -5133,6 +5218,11 @@ enum nl80211_feature_flags {
  *     support to nl80211.
  * @NL80211_EXT_FEATURE_TXQS: Driver supports FQ-CoDel-enabled intermediate
  *      TXQs.
+ * @NL80211_EXT_FEATURE_SCAN_RANDOM_SN: Driver/device supports randomizing the
+ *     SN in probe request frames if requested by %NL80211_SCAN_FLAG_RANDOM_SN.
+ * @NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT: Driver/device can omit all data
+ *     except for supported rates from the probe request content if requested
+ *     by the %NL80211_SCAN_FLAG_MIN_PREQ_CONTENT flag.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -5167,6 +5257,8 @@ enum nl80211_ext_feature_index {
        NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211,
        NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT,
        NL80211_EXT_FEATURE_TXQS,
+       NL80211_EXT_FEATURE_SCAN_RANDOM_SN,
+       NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT,
 
        /* add new features before the definition below */
        NUM_NL80211_EXT_FEATURES,
@@ -5272,6 +5364,12 @@ enum nl80211_timeout_reason {
  *     possible scan results. This flag hints the driver to use the best
  *     possible scan configuration to improve the accuracy in scanning.
  *     Latency and power use may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_RANDOM_SN: randomize the sequence number in probe
+ *     request frames from this scan to avoid correlation/tracking being
+ *     possible.
+ * @NL80211_SCAN_FLAG_MIN_PREQ_CONTENT: minimize probe request content to
+ *     only have supported rates and no additional capabilities (unless
+ *     added by userspace explicitly.)
  */
 enum nl80211_scan_flags {
        NL80211_SCAN_FLAG_LOW_PRIORITY                          = 1<<0,
@@ -5285,6 +5383,8 @@ enum nl80211_scan_flags {
        NL80211_SCAN_FLAG_LOW_SPAN                              = 1<<8,
        NL80211_SCAN_FLAG_LOW_POWER                             = 1<<9,
        NL80211_SCAN_FLAG_HIGH_ACCURACY                         = 1<<10,
+       NL80211_SCAN_FLAG_RANDOM_SN                             = 1<<11,
+       NL80211_SCAN_FLAG_MIN_PREQ_CONTENT                      = 1<<12,
 };
 
 /**
index 863aabaa5cc926f82667637eab6974a3dfb0199f..dbe0cbe4f1b72b9e0c4751791614e98b0825989f 100644 (file)
@@ -840,6 +840,8 @@ struct ovs_action_push_eth {
  * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
  * @OVS_ACTION_ATTR_METER: Run packet through a meter, which may drop the
  * packet, or modify the packet (e.g., change the DSCP field).
+ * @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of
+ * actions without affecting the original packet and key.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -873,6 +875,7 @@ enum ovs_action_attr {
        OVS_ACTION_ATTR_PUSH_NSH,     /* Nested OVS_NSH_KEY_ATTR_*. */
        OVS_ACTION_ATTR_POP_NSH,      /* No argument. */
        OVS_ACTION_ATTR_METER,        /* u32 meter ID. */
+       OVS_ACTION_ATTR_CLONE,        /* Nested OVS_CLONE_ATTR_*.  */
 
        __OVS_ACTION_ATTR_MAX,        /* Nothing past this will be accepted
                                       * from userspace. */
index 84e4c1d0f874afec5891fcf95def286c121f71ed..48e5b5d49a347d046aa5df54645f5bca78577799 100644 (file)
@@ -45,6 +45,7 @@ enum {
                                   * the skb and act like everything
                                   * is alright.
                                   */
+#define TC_ACT_VALUE_MAX       TC_ACT_TRAP
 
 /* There is a special kind of actions called "extended actions",
  * which need a value parameter. These have a local opcode located in
@@ -55,11 +56,12 @@ enum {
 #define __TC_ACT_EXT_SHIFT 28
 #define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
 #define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
-#define TC_ACT_EXT_CMP(combined, opcode) \
-       (((combined) & (~TC_ACT_EXT_VAL_MASK)) == opcode)
+#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
+#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
 
 #define TC_ACT_JUMP __TC_ACT_EXT(1)
 #define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
+#define TC_ACT_EXT_OPCODE_MAX  TC_ACT_GOTO_CHAIN
 
 /* Action type identifiers*/
 enum {
@@ -469,6 +471,15 @@ enum {
        TCA_FLOWER_KEY_IP_TTL,          /* u8 */
        TCA_FLOWER_KEY_IP_TTL_MASK,     /* u8 */
 
+       TCA_FLOWER_KEY_CVLAN_ID,        /* be16 */
+       TCA_FLOWER_KEY_CVLAN_PRIO,      /* u8   */
+       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,  /* be16 */
+
+       TCA_FLOWER_KEY_ENC_IP_TOS,      /* u8 */
+       TCA_FLOWER_KEY_ENC_IP_TOS_MASK, /* u8 */
+       TCA_FLOWER_KEY_ENC_IP_TTL,      /* u8 */
+       TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */
+
        __TCA_FLOWER_MAX,
 };
 
index 37b5096ae97be4e6115b0941b82918e11250ee6b..8975fd1a1421f58952abed4b9d54f839711268df 100644 (file)
@@ -124,6 +124,21 @@ struct tc_fifo_qopt {
        __u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+       __u32   limit;          /* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS 16
@@ -539,6 +554,7 @@ enum {
        TCA_NETEM_LATENCY64,
        TCA_NETEM_JITTER64,
        TCA_NETEM_SLOT,
+       TCA_NETEM_SLOT_DIST,
        __TCA_NETEM_MAX,
 };
 
@@ -581,6 +597,8 @@ struct tc_netem_slot {
        __s64   max_delay;
        __s32   max_packets;
        __s32   max_bytes;
+       __s64   dist_delay; /* nsec */
+       __s64   dist_jitter; /* nsec */
 };
 
 enum {
@@ -934,4 +952,136 @@ enum {
 
 #define TCA_CBS_MAX (__TCA_CBS_MAX - 1)
 
+
+/* ETF */
+struct tc_etf_qopt {
+       __s32 delta;
+       __s32 clockid;
+       __u32 flags;
+#define TC_ETF_DEADLINE_MODE_ON        BIT(0)
+#define TC_ETF_OFFLOAD_ON      BIT(1)
+};
+
+enum {
+       TCA_ETF_UNSPEC,
+       TCA_ETF_PARMS,
+       __TCA_ETF_MAX,
+};
+
+#define TCA_ETF_MAX (__TCA_ETF_MAX - 1)
+
+
+/* CAKE */
+enum {
+       TCA_CAKE_UNSPEC,
+       TCA_CAKE_PAD,
+       TCA_CAKE_BASE_RATE64,
+       TCA_CAKE_DIFFSERV_MODE,
+       TCA_CAKE_ATM,
+       TCA_CAKE_FLOW_MODE,
+       TCA_CAKE_OVERHEAD,
+       TCA_CAKE_RTT,
+       TCA_CAKE_TARGET,
+       TCA_CAKE_AUTORATE,
+       TCA_CAKE_MEMORY,
+       TCA_CAKE_NAT,
+       TCA_CAKE_RAW,
+       TCA_CAKE_WASH,
+       TCA_CAKE_MPU,
+       TCA_CAKE_INGRESS,
+       TCA_CAKE_ACK_FILTER,
+       TCA_CAKE_SPLIT_GSO,
+       __TCA_CAKE_MAX
+};
+#define TCA_CAKE_MAX   (__TCA_CAKE_MAX - 1)
+
+enum {
+       __TCA_CAKE_STATS_INVALID,
+       TCA_CAKE_STATS_PAD,
+       TCA_CAKE_STATS_CAPACITY_ESTIMATE64,
+       TCA_CAKE_STATS_MEMORY_LIMIT,
+       TCA_CAKE_STATS_MEMORY_USED,
+       TCA_CAKE_STATS_AVG_NETOFF,
+       TCA_CAKE_STATS_MIN_NETLEN,
+       TCA_CAKE_STATS_MAX_NETLEN,
+       TCA_CAKE_STATS_MIN_ADJLEN,
+       TCA_CAKE_STATS_MAX_ADJLEN,
+       TCA_CAKE_STATS_TIN_STATS,
+       TCA_CAKE_STATS_DEFICIT,
+       TCA_CAKE_STATS_COBALT_COUNT,
+       TCA_CAKE_STATS_DROPPING,
+       TCA_CAKE_STATS_DROP_NEXT_US,
+       TCA_CAKE_STATS_P_DROP,
+       TCA_CAKE_STATS_BLUE_TIMER_US,
+       __TCA_CAKE_STATS_MAX
+};
+#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
+
+enum {
+       __TCA_CAKE_TIN_STATS_INVALID,
+       TCA_CAKE_TIN_STATS_PAD,
+       TCA_CAKE_TIN_STATS_SENT_PACKETS,
+       TCA_CAKE_TIN_STATS_SENT_BYTES64,
+       TCA_CAKE_TIN_STATS_DROPPED_PACKETS,
+       TCA_CAKE_TIN_STATS_DROPPED_BYTES64,
+       TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS,
+       TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64,
+       TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS,
+       TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64,
+       TCA_CAKE_TIN_STATS_BACKLOG_PACKETS,
+       TCA_CAKE_TIN_STATS_BACKLOG_BYTES,
+       TCA_CAKE_TIN_STATS_THRESHOLD_RATE64,
+       TCA_CAKE_TIN_STATS_TARGET_US,
+       TCA_CAKE_TIN_STATS_INTERVAL_US,
+       TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS,
+       TCA_CAKE_TIN_STATS_WAY_MISSES,
+       TCA_CAKE_TIN_STATS_WAY_COLLISIONS,
+       TCA_CAKE_TIN_STATS_PEAK_DELAY_US,
+       TCA_CAKE_TIN_STATS_AVG_DELAY_US,
+       TCA_CAKE_TIN_STATS_BASE_DELAY_US,
+       TCA_CAKE_TIN_STATS_SPARSE_FLOWS,
+       TCA_CAKE_TIN_STATS_BULK_FLOWS,
+       TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS,
+       TCA_CAKE_TIN_STATS_MAX_SKBLEN,
+       TCA_CAKE_TIN_STATS_FLOW_QUANTUM,
+       __TCA_CAKE_TIN_STATS_MAX
+};
+#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1)
+#define TC_CAKE_MAX_TINS (8)
+
+enum {
+       CAKE_FLOW_NONE = 0,
+       CAKE_FLOW_SRC_IP,
+       CAKE_FLOW_DST_IP,
+       CAKE_FLOW_HOSTS,    /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */
+       CAKE_FLOW_FLOWS,
+       CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */
+       CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */
+       CAKE_FLOW_TRIPLE,   /* = CAKE_FLOW_HOSTS  | CAKE_FLOW_FLOWS */
+       CAKE_FLOW_MAX,
+};
+
+enum {
+       CAKE_DIFFSERV_DIFFSERV3 = 0,
+       CAKE_DIFFSERV_DIFFSERV4,
+       CAKE_DIFFSERV_DIFFSERV8,
+       CAKE_DIFFSERV_BESTEFFORT,
+       CAKE_DIFFSERV_PRECEDENCE,
+       CAKE_DIFFSERV_MAX
+};
+
+enum {
+       CAKE_ACK_NONE = 0,
+       CAKE_ACK_FILTER,
+       CAKE_ACK_AGGRESSIVE,
+       CAKE_ACK_MAX
+};
+
+enum {
+       CAKE_ATM_NONE = 0,
+       CAKE_ATM_ATM,
+       CAKE_ATM_PTM,
+       CAKE_ATM_MAX
+};
+
 #endif
index 20c6bd0b00079e9edd199cc1c138c28d3129fc46..dc520e1a4123f7a60d4996b5d0df7d237199cdba 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
- * Copyright (c) 2008 Oracle.  All rights reserved.
+ * Copyright (c) 2008, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
 #define RDS_INFO_IB_CONNECTIONS                10008
 #define RDS_INFO_CONNECTION_STATS      10009
 #define RDS_INFO_IWARP_CONNECTIONS     10010
-#define RDS_INFO_LAST                  10010
+
+/* PF_RDS6 options */
+#define RDS6_INFO_CONNECTIONS          10011
+#define RDS6_INFO_SEND_MESSAGES                10012
+#define RDS6_INFO_RETRANS_MESSAGES     10013
+#define RDS6_INFO_RECV_MESSAGES                10014
+#define RDS6_INFO_SOCKETS              10015
+#define RDS6_INFO_TCP_SOCKETS          10016
+#define RDS6_INFO_IB_CONNECTIONS       10017
+
+#define RDS_INFO_LAST                  10017
 
 struct rds_info_counter {
        __u8    name[32];
@@ -140,6 +150,15 @@ struct rds_info_connection {
        __u8            flags;
 } __attribute__((packed));
 
+struct rds6_info_connection {
+       __u64           next_tx_seq;
+       __u64           next_rx_seq;
+       struct in6_addr laddr;
+       struct in6_addr faddr;
+       __u8            transport[TRANSNAMSIZ];         /* null term ascii */
+       __u8            flags;
+} __attribute__((packed));
+
 #define RDS_INFO_MESSAGE_FLAG_ACK               0x01
 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
 
@@ -153,6 +172,17 @@ struct rds_info_message {
        __u8            flags;
 } __attribute__((packed));
 
+struct rds6_info_message {
+       __u64   seq;
+       __u32   len;
+       struct in6_addr laddr;
+       struct in6_addr faddr;
+       __be16          lport;
+       __be16          fport;
+       __u8            flags;
+       __u8            tos;
+} __attribute__((packed));
+
 struct rds_info_socket {
        __u32           sndbuf;
        __be32          bound_addr;
@@ -163,6 +193,16 @@ struct rds_info_socket {
        __u64           inum;
 } __attribute__((packed));
 
+struct rds6_info_socket {
+       __u32           sndbuf;
+       struct in6_addr bound_addr;
+       struct in6_addr connected_addr;
+       __be16          bound_port;
+       __be16          connected_port;
+       __u32           rcvbuf;
+       __u64           inum;
+} __attribute__((packed));
+
 struct rds_info_tcp_socket {
        __be32          local_addr;
        __be16          local_port;
@@ -175,6 +215,18 @@ struct rds_info_tcp_socket {
        __u32           last_seen_una;
 } __attribute__((packed));
 
+struct rds6_info_tcp_socket {
+       struct in6_addr local_addr;
+       __be16          local_port;
+       struct in6_addr peer_addr;
+       __be16          peer_port;
+       __u64           hdr_rem;
+       __u64           data_rem;
+       __u32           last_sent_nxt;
+       __u32           last_expected_una;
+       __u32           last_seen_una;
+} __attribute__((packed));
+
 #define RDS_IB_GID_LEN 16
 struct rds_info_rdma_connection {
        __be32          src_addr;
@@ -189,6 +241,19 @@ struct rds_info_rdma_connection {
        __u32           rdma_mr_size;
 };
 
+struct rds6_info_rdma_connection {
+       struct in6_addr src_addr;
+       struct in6_addr dst_addr;
+       __u8            src_gid[RDS_IB_GID_LEN];
+       __u8            dst_gid[RDS_IB_GID_LEN];
+
+       __u32           max_send_wr;
+       __u32           max_recv_wr;
+       __u32           max_send_sge;
+       __u32           rdma_mr_max;
+       __u32           rdma_mr_size;
+};
+
 /* RDS message Receive Path Latency points */
 enum rds_message_rxpath_latency {
        RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
index 7d8502313c99327776dd9fd69ce86a997ebf79cf..46399367627fcefb0abdf145e374dd82f4de731a 100644 (file)
@@ -150,6 +150,13 @@ enum {
        RTM_NEWCACHEREPORT = 96,
 #define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
 
+       RTM_NEWCHAIN = 100,
+#define RTM_NEWCHAIN RTM_NEWCHAIN
+       RTM_DELCHAIN,
+#define RTM_DELCHAIN RTM_DELCHAIN
+       RTM_GETCHAIN,
+#define RTM_GETCHAIN RTM_GETCHAIN
+
        __RTM_MAX,
 #define RTM_MAX                (((__RTM_MAX + 3) & ~3) - 1)
 };
index b64d583bf053bef4826a4571589a7c3d76632659..b479db5c71d932082741567a73d479800ee1117e 100644 (file)
@@ -100,6 +100,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_RECVNXTINFO       33
 #define SCTP_DEFAULT_SNDINFO   34
 #define SCTP_AUTH_DEACTIVATE_KEY       35
+#define SCTP_REUSE_PORT                36
 
 /* Internal Socket Options. Some of the sctp library functions are
  * implemented using these socket options.
@@ -762,6 +763,8 @@ enum  sctp_spp_flags {
        SPP_SACKDELAY_DISABLE = 1<<6,   /*Disable SACK*/
        SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE,
        SPP_HB_TIME_IS_ZERO = 1<<7,     /* Set HB delay to 0 */
+       SPP_IPV6_FLOWLABEL = 1<<8,
+       SPP_DSCP = 1<<9,
 };
 
 struct sctp_paddrparams {
@@ -772,6 +775,8 @@ struct sctp_paddrparams {
        __u32                   spp_pathmtu;
        __u32                   spp_sackdelay;
        __u32                   spp_flags;
+       __u32                   spp_ipv6_flowlabel;
+       __u8                    spp_dscp;
 } __attribute__((packed, aligned(4)));
 
 /*
index 0ae5d4685ba369d1a83df4f0feabb065f5dbdd74..ac9e8c96d9bd01ad09bd70fab9654ef70cc40a81 100644 (file)
@@ -20,7 +20,7 @@ struct smc_diag_req {
 struct smc_diag_msg {
        __u8    diag_family;
        __u8    diag_state;
-       __u8    diag_fallback;
+       __u8    diag_mode;
        __u8    diag_shutdown;
        struct inet_diag_sockid id;
 
@@ -28,6 +28,13 @@ struct smc_diag_msg {
        __u64   diag_inode;
 };
 
+/* Mode of a connection */
+enum {
+       SMC_DIAG_MODE_SMCR,
+       SMC_DIAG_MODE_FALLBACK_TCP,
+       SMC_DIAG_MODE_SMCD,
+};
+
 /* Extensions */
 
 enum {
@@ -35,6 +42,8 @@ enum {
        SMC_DIAG_CONNINFO,
        SMC_DIAG_LGRINFO,
        SMC_DIAG_SHUTDOWN,
+       SMC_DIAG_DMBINFO,
+       SMC_DIAG_FALLBACK,
        __SMC_DIAG_MAX,
 };
 
@@ -83,4 +92,18 @@ struct smc_diag_lgrinfo {
        struct smc_diag_linkinfo        lnk[1];
        __u8                            role;
 };
+
+struct smc_diag_fallback {
+       __u32 reason;
+       __u32 peer_diagnosis;
+};
+
+struct smcd_diag_dmbinfo {             /* SMC-D Socket internals */
+       __u32 linkid;                   /* Link identifier */
+       __u64 peer_gid;                 /* Peer GID */
+       __u64 my_gid;                   /* My GID */
+       __u64 token;                    /* Token of DMB */
+       __u64 peer_token;               /* Token of remote DMBE */
+};
+
 #endif /* _UAPI_SMC_DIAG_H_ */
index 750d89120335eb489f698191edb6c5110969fa8c..e5ebc83827abbcaaf82e1f46011540fc273c65f2 100644 (file)
@@ -279,6 +279,8 @@ enum
        LINUX_MIB_TCPDELIVERED,                 /* TCPDelivered */
        LINUX_MIB_TCPDELIVEREDCE,               /* TCPDeliveredCE */
        LINUX_MIB_TCPACKCOMPRESSED,             /* TCPAckCompressed */
+       LINUX_MIB_TCPZEROWINDOWDROP,            /* TCPZeroWindowDrop */
+       LINUX_MIB_TCPRCVQDROP,                  /* TCPRcvQDrop */
        __LINUX_MIB_MAX
 };
 
index 162d1094c41c09ca832c0139474f99b62e6a4aaf..24ec792dacc1828f814625a6b6e0109566e4843b 100644 (file)
@@ -17,13 +17,15 @@ enum {
        TCA_PEDIT_KEY_EX,
        __TCA_PEDIT_MAX
 };
+
 #define TCA_PEDIT_MAX (__TCA_PEDIT_MAX - 1)
-                                                                                
+
 enum {
        TCA_PEDIT_KEY_EX_HTYPE = 1,
        TCA_PEDIT_KEY_EX_CMD = 2,
        __TCA_PEDIT_KEY_EX_MAX
 };
+
 #define TCA_PEDIT_KEY_EX_MAX (__TCA_PEDIT_KEY_EX_MAX - 1)
 
  /* TCA_PEDIT_KEY_EX_HDR_TYPE_NETWROK is a special case for legacy users. It
@@ -38,6 +40,7 @@ enum pedit_header_type {
        TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
        __PEDIT_HDR_TYPE_MAX,
 };
+
 #define TCA_PEDIT_HDR_TYPE_MAX (__PEDIT_HDR_TYPE_MAX - 1)
 
 enum pedit_cmd {
@@ -45,6 +48,7 @@ enum pedit_cmd {
        TCA_PEDIT_KEY_EX_CMD_ADD = 1,
        __PEDIT_CMD_MAX,
 };
+
 #define TCA_PEDIT_CMD_MAX (__PEDIT_CMD_MAX - 1)
 
 struct tc_pedit_key {
@@ -55,13 +59,14 @@ struct tc_pedit_key {
        __u32           offmask;
        __u32           shift;
 };
-                                                                                
+
 struct tc_pedit_sel {
        tc_gen;
        unsigned char           nkeys;
        unsigned char           flags;
        struct tc_pedit_key     keys[0];
 };
+
 #define tc_pedit tc_pedit_sel
 
 #endif
index fbcfe27a4e6c4173553fe675655f86c2e4f51045..6de6071ebed605f3165ad49eca2ac645b4cff5a9 100644 (file)
@@ -30,6 +30,7 @@
 #define SKBEDIT_F_MARK                 0x4
 #define SKBEDIT_F_PTYPE                        0x8
 #define SKBEDIT_F_MASK                 0x10
+#define SKBEDIT_F_INHERITDSFIELD       0x20
 
 struct tc_skbedit {
        tc_gen;
@@ -45,6 +46,7 @@ enum {
        TCA_SKBEDIT_PAD,
        TCA_SKBEDIT_PTYPE,
        TCA_SKBEDIT_MASK,
+       TCA_SKBEDIT_FLAGS,
        __TCA_SKBEDIT_MAX
 };
 #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
index 72bbefe5d1d12234fcf34f00fd32acb0a3555fcb..be384d63e1b56b297707cb5159c50ea4e3dff0dc 100644 (file)
@@ -36,9 +36,37 @@ enum {
        TCA_TUNNEL_KEY_PAD,
        TCA_TUNNEL_KEY_ENC_DST_PORT,    /* be16 */
        TCA_TUNNEL_KEY_NO_CSUM,         /* u8 */
+       TCA_TUNNEL_KEY_ENC_OPTS,        /* Nested TCA_TUNNEL_KEY_ENC_OPTS_
+                                        * attributes
+                                        */
+       TCA_TUNNEL_KEY_ENC_TOS,         /* u8 */
+       TCA_TUNNEL_KEY_ENC_TTL,         /* u8 */
        __TCA_TUNNEL_KEY_MAX,
 };
 
 #define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1)
 
+enum {
+       TCA_TUNNEL_KEY_ENC_OPTS_UNSPEC,
+       TCA_TUNNEL_KEY_ENC_OPTS_GENEVE,         /* Nested
+                                                * TCA_TUNNEL_KEY_ENC_OPTS_
+                                                * attributes
+                                                */
+       __TCA_TUNNEL_KEY_ENC_OPTS_MAX,
+};
+
+#define TCA_TUNNEL_KEY_ENC_OPTS_MAX (__TCA_TUNNEL_KEY_ENC_OPTS_MAX - 1)
+
+enum {
+       TCA_TUNNEL_KEY_ENC_OPT_GENEVE_UNSPEC,
+       TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,            /* be16 */
+       TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE,             /* u8 */
+       TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA,             /* 4 to 128 bytes */
+
+       __TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX,
+};
+
+#define TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX \
+       (__TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX - 1)
+
 #endif
index e3f6ed8a7064f9276ca2b57ed5ecff3364786e9d..e02d31986ff911b0547bd954abcc7339f4668ca6 100644 (file)
@@ -235,6 +235,11 @@ struct tcp_info {
 
        __u32   tcpi_delivered;
        __u32   tcpi_delivered_ce;
+
+       __u64   tcpi_bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
+       __u64   tcpi_bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans */
+       __u32   tcpi_dsack_dups;     /* RFC4898 tcpEStatsStackDSACKDups */
+       __u32   tcpi_reord_seen;     /* reordering events seen */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -257,7 +262,10 @@ enum {
        TCP_NLA_SND_SSTHRESH,   /* Slow start size threshold */
        TCP_NLA_DELIVERED,      /* Data pkts delivered incl. out-of-order */
        TCP_NLA_DELIVERED_CE,   /* Like above but only ones w/ CE marks */
-
+       TCP_NLA_BYTES_SENT,     /* Data bytes sent including retransmission */
+       TCP_NLA_BYTES_RETRANS,  /* Data bytes retransmitted */
+       TCP_NLA_DSACK_DUPS,     /* DSACK blocks received */
+       TCP_NLA_REORD_SEEN,     /* reordering events seen */
 };
 
 /* for TCP_MD5SIG socket option */
index 85c11982c89b38a3995db683d08b7f89ac2e3889..0ebe02ef1a86b1eeceeac99c8354bd38516a26e2 100644 (file)
@@ -121,6 +121,7 @@ enum {
        TIPC_NLA_SOCK_TIPC_STATE,       /* u32 */
        TIPC_NLA_SOCK_COOKIE,           /* u64 */
        TIPC_NLA_SOCK_PAD,              /* flag */
+       TIPC_NLA_SOCK_GROUP,            /* nest */
 
        __TIPC_NLA_SOCK_MAX,
        TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1
@@ -233,6 +234,19 @@ enum {
        TIPC_NLA_MON_PEER_MAX = __TIPC_NLA_MON_PEER_MAX - 1
 };
 
+/* Nest, socket group info */
+enum {
+       TIPC_NLA_SOCK_GROUP_ID,                 /* u32 */
+       TIPC_NLA_SOCK_GROUP_OPEN,               /* flag */
+       TIPC_NLA_SOCK_GROUP_NODE_SCOPE,         /* flag */
+       TIPC_NLA_SOCK_GROUP_CLUSTER_SCOPE,      /* flag */
+       TIPC_NLA_SOCK_GROUP_INSTANCE,           /* u32 */
+       TIPC_NLA_SOCK_GROUP_BC_SEND_NEXT,       /* u32 */
+
+       __TIPC_NLA_SOCK_GROUP_MAX,
+       TIPC_NLA_SOCK_GROUP_MAX = __TIPC_NLA_SOCK_GROUP_MAX - 1
+};
+
 /* Nest, connection info */
 enum {
        TIPC_NLA_CON_UNSPEC,
index e3af2859188bb1e52d6f0c49be7a9edb1a91dda7..5f3b9fec7b5f4491ad9f38beea7447a305ff4fb0 100644 (file)
@@ -305,9 +305,12 @@ enum xfrm_attr_type_t {
        XFRMA_ADDRESS_FILTER,   /* struct xfrm_address_filter */
        XFRMA_PAD,
        XFRMA_OFFLOAD_DEV,      /* struct xfrm_state_offload */
-       XFRMA_OUTPUT_MARK,      /* __u32 */
+       XFRMA_SET_MARK,         /* __u32 */
+       XFRMA_SET_MARK_MASK,    /* __u32 */
+       XFRMA_IF_ID,            /* __u32 */
        __XFRMA_MAX
 
+#define XFRMA_OUTPUT_MARK XFRMA_SET_MARK       /* Compatibility */
 #define XFRMA_MAX (__XFRMA_MAX - 1)
 };
 
index 3b654530259809d0241baa695136a59ad169fe4c..203281198079c660c18f9cabf8b9b61c26caf7d4 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -38,6 +38,7 @@
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <linux/rhashtable.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
index 76e95e4f3aa284f6ded3962b3055233ea533add8..00ef2f743a628e0c11d33ed64da2295f9c2441ca 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -86,6 +86,7 @@
 #include <linux/ipc_namespace.h>
 #include <linux/sched/wake_q.h>
 #include <linux/nospec.h>
+#include <linux/rhashtable.h>
 
 #include <linux/uaccess.h>
 #include "util.h"
index fefa00d310fb5080334392087a764730e23cc34d..fa1d322daa27fed43cc2cf5b994aeb19895121c6 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -43,6 +43,7 @@
 #include <linux/nsproxy.h>
 #include <linux/mount.h>
 #include <linux/ipc_namespace.h>
+#include <linux/rhashtable.h>
 
 #include <linux/uaccess.h>
 
index 4e81182fa0ac48cad2ed3f5afdd8141e161a920b..fdffff41f65b546f66a5ee9b69297b39c445b8a0 100644 (file)
@@ -63,6 +63,7 @@
 #include <linux/rwsem.h>
 #include <linux/memory.h>
 #include <linux/ipc_namespace.h>
+#include <linux/rhashtable.h>
 
 #include <asm/unistd.h>
 
index 3d83ee7df381b1def956b5e645376451d797440e..badabb0b435cb72747a4065b6d8c62c7c3419a2d 100644 (file)
@@ -95,7 +95,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
                                   enum bpf_attach_type type,
                                   struct bpf_prog_array __rcu **array)
 {
-       struct bpf_prog_array __rcu *progs;
+       struct bpf_prog_array *progs;
        struct bpf_prog_list *pl;
        struct cgroup *p = cgrp;
        int cnt = 0;
@@ -120,13 +120,12 @@ static int compute_effective_progs(struct cgroup *cgrp,
                                            &p->bpf.progs[type], node) {
                                if (!pl->prog)
                                        continue;
-                               rcu_dereference_protected(progs, 1)->
-                                       progs[cnt++] = pl->prog;
+                               progs->progs[cnt++] = pl->prog;
                        }
                p = cgroup_parent(p);
        } while (p);
 
-       *array = progs;
+       rcu_assign_pointer(*array, progs);
        return 0;
 }
 
index 1e5625d46414cc68efe372b2c6a8dab266a24dd6..253aa8e79c7b4e22bfb5f626c35e5a11a7f035e0 100644 (file)
@@ -1538,7 +1538,7 @@ static struct {
        .null_prog = NULL,
 };
 
-struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
+struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
 {
        if (prog_cnt)
                return kzalloc(sizeof(struct bpf_prog_array) +
index ac747d5cf7c68b196c3e37a1c58a9e23bc5fd786..177a524363942e0c66079cf51b0f760c98bf8031 100644 (file)
 #include <linux/bug.h>
 #include <linux/kdev_t.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/proc_ns.h>
+#include <linux/rhashtable.h>
 #include <linux/rtnetlink.h>
 #include <linux/rwsem.h>
 
-/* Protects bpf_prog_offload_devs, bpf_map_offload_devs and offload members
+/* Protects offdevs, members of bpf_offload_netdev and offload members
  * of all progs.
  * RTNL lock cannot be taken when holding this lock.
  */
 static DECLARE_RWSEM(bpf_devs_lock);
-static LIST_HEAD(bpf_prog_offload_devs);
-static LIST_HEAD(bpf_map_offload_devs);
+
+struct bpf_offload_dev {
+       struct list_head netdevs;
+};
+
+struct bpf_offload_netdev {
+       struct rhash_head l;
+       struct net_device *netdev;
+       struct bpf_offload_dev *offdev;
+       struct list_head progs;
+       struct list_head maps;
+       struct list_head offdev_netdevs;
+};
+
+static const struct rhashtable_params offdevs_params = {
+       .nelem_hint             = 4,
+       .key_len                = sizeof(struct net_device *),
+       .key_offset             = offsetof(struct bpf_offload_netdev, netdev),
+       .head_offset            = offsetof(struct bpf_offload_netdev, l),
+       .automatic_shrinking    = true,
+};
+
+static struct rhashtable offdevs;
+static bool offdevs_inited;
 
 static int bpf_dev_offload_check(struct net_device *netdev)
 {
@@ -41,8 +65,19 @@ static int bpf_dev_offload_check(struct net_device *netdev)
        return 0;
 }
 
+static struct bpf_offload_netdev *
+bpf_offload_find_netdev(struct net_device *netdev)
+{
+       lockdep_assert_held(&bpf_devs_lock);
+
+       if (!offdevs_inited)
+               return NULL;
+       return rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+}
+
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 {
+       struct bpf_offload_netdev *ondev;
        struct bpf_prog_offload *offload;
        int err;
 
@@ -66,12 +101,13 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
                goto err_maybe_put;
 
        down_write(&bpf_devs_lock);
-       if (offload->netdev->reg_state != NETREG_REGISTERED) {
+       ondev = bpf_offload_find_netdev(offload->netdev);
+       if (!ondev) {
                err = -EINVAL;
                goto err_unlock;
        }
        prog->aux->offload = offload;
-       list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
+       list_add_tail(&offload->offloads, &ondev->progs);
        dev_put(offload->netdev);
        up_write(&bpf_devs_lock);
 
@@ -294,6 +330,7 @@ static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
 struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 {
        struct net *net = current->nsproxy->net_ns;
+       struct bpf_offload_netdev *ondev;
        struct bpf_offloaded_map *offmap;
        int err;
 
@@ -316,11 +353,17 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
        if (err)
                goto err_unlock;
 
+       ondev = bpf_offload_find_netdev(offmap->netdev);
+       if (!ondev) {
+               err = -EINVAL;
+               goto err_unlock;
+       }
+
        err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
        if (err)
                goto err_unlock;
 
-       list_add_tail(&offmap->offloads, &bpf_map_offload_devs);
+       list_add_tail(&offmap->offloads, &ondev->maps);
        up_write(&bpf_devs_lock);
        rtnl_unlock();
 
@@ -468,77 +511,159 @@ int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
        return 0;
 }
 
-bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
+static bool __bpf_offload_dev_match(struct bpf_prog *prog,
+                                   struct net_device *netdev)
 {
-       struct bpf_offloaded_map *offmap;
+       struct bpf_offload_netdev *ondev1, *ondev2;
        struct bpf_prog_offload *offload;
-       bool ret;
 
        if (!bpf_prog_is_dev_bound(prog->aux))
                return false;
-       if (!bpf_map_is_dev_bound(map))
-               return bpf_map_offload_neutral(map);
 
-       down_read(&bpf_devs_lock);
        offload = prog->aux->offload;
-       offmap = map_to_offmap(map);
+       if (!offload)
+               return false;
+       if (offload->netdev == netdev)
+               return true;
 
-       ret = offload && offload->netdev == offmap->netdev;
+       ondev1 = bpf_offload_find_netdev(offload->netdev);
+       ondev2 = bpf_offload_find_netdev(netdev);
+
+       return ondev1 && ondev2 && ondev1->offdev == ondev2->offdev;
+}
+
+bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev)
+{
+       bool ret;
+
+       down_read(&bpf_devs_lock);
+       ret = __bpf_offload_dev_match(prog, netdev);
        up_read(&bpf_devs_lock);
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(bpf_offload_dev_match);
 
-static void bpf_offload_orphan_all_progs(struct net_device *netdev)
+bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
 {
-       struct bpf_prog_offload *offload, *tmp;
+       struct bpf_offloaded_map *offmap;
+       bool ret;
 
-       list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads)
-               if (offload->netdev == netdev)
-                       __bpf_prog_offload_destroy(offload->prog);
+       if (!bpf_map_is_dev_bound(map))
+               return bpf_map_offload_neutral(map);
+       offmap = map_to_offmap(map);
+
+       down_read(&bpf_devs_lock);
+       ret = __bpf_offload_dev_match(prog, offmap->netdev);
+       up_read(&bpf_devs_lock);
+
+       return ret;
 }
 
-static void bpf_offload_orphan_all_maps(struct net_device *netdev)
+int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
+                                   struct net_device *netdev)
 {
-       struct bpf_offloaded_map *offmap, *tmp;
+       struct bpf_offload_netdev *ondev;
+       int err;
 
-       list_for_each_entry_safe(offmap, tmp, &bpf_map_offload_devs, offloads)
-               if (offmap->netdev == netdev)
-                       __bpf_map_offload_destroy(offmap);
+       ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
+       if (!ondev)
+               return -ENOMEM;
+
+       ondev->netdev = netdev;
+       ondev->offdev = offdev;
+       INIT_LIST_HEAD(&ondev->progs);
+       INIT_LIST_HEAD(&ondev->maps);
+
+       down_write(&bpf_devs_lock);
+       err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
+       if (err) {
+               netdev_warn(netdev, "failed to register for BPF offload\n");
+               goto err_unlock_free;
+       }
+
+       list_add(&ondev->offdev_netdevs, &offdev->netdevs);
+       up_write(&bpf_devs_lock);
+       return 0;
+
+err_unlock_free:
+       up_write(&bpf_devs_lock);
+       kfree(ondev);
+       return err;
 }
+EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
 
-static int bpf_offload_notification(struct notifier_block *notifier,
-                                   ulong event, void *ptr)
+void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
+                                      struct net_device *netdev)
 {
-       struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+       struct bpf_offload_netdev *ondev, *altdev;
+       struct bpf_offloaded_map *offmap, *mtmp;
+       struct bpf_prog_offload *offload, *ptmp;
 
        ASSERT_RTNL();
 
-       switch (event) {
-       case NETDEV_UNREGISTER:
-               /* ignore namespace changes */
-               if (netdev->reg_state != NETREG_UNREGISTERING)
-                       break;
-
-               down_write(&bpf_devs_lock);
-               bpf_offload_orphan_all_progs(netdev);
-               bpf_offload_orphan_all_maps(netdev);
-               up_write(&bpf_devs_lock);
-               break;
-       default:
-               break;
+       down_write(&bpf_devs_lock);
+       ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+       if (WARN_ON(!ondev))
+               goto unlock;
+
+       WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
+       list_del(&ondev->offdev_netdevs);
+
+       /* Try to move the objects to another netdev of the device */
+       altdev = list_first_entry_or_null(&offdev->netdevs,
+                                         struct bpf_offload_netdev,
+                                         offdev_netdevs);
+       if (altdev) {
+               list_for_each_entry(offload, &ondev->progs, offloads)
+                       offload->netdev = altdev->netdev;
+               list_splice_init(&ondev->progs, &altdev->progs);
+
+               list_for_each_entry(offmap, &ondev->maps, offloads)
+                       offmap->netdev = altdev->netdev;
+               list_splice_init(&ondev->maps, &altdev->maps);
+       } else {
+               list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
+                       __bpf_prog_offload_destroy(offload->prog);
+               list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
+                       __bpf_map_offload_destroy(offmap);
        }
-       return NOTIFY_OK;
-}
 
-static struct notifier_block bpf_offload_notifier = {
-       .notifier_call = bpf_offload_notification,
-};
+       WARN_ON(!list_empty(&ondev->progs));
+       WARN_ON(!list_empty(&ondev->maps));
+       kfree(ondev);
+unlock:
+       up_write(&bpf_devs_lock);
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
 
-static int __init bpf_offload_init(void)
+struct bpf_offload_dev *bpf_offload_dev_create(void)
 {
-       register_netdevice_notifier(&bpf_offload_notifier);
-       return 0;
+       struct bpf_offload_dev *offdev;
+       int err;
+
+       down_write(&bpf_devs_lock);
+       if (!offdevs_inited) {
+               err = rhashtable_init(&offdevs, &offdevs_params);
+               if (err)
+                       return ERR_PTR(err);
+               offdevs_inited = true;
+       }
+       up_write(&bpf_devs_lock);
+
+       offdev = kzalloc(sizeof(*offdev), GFP_KERNEL);
+       if (!offdev)
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&offdev->netdevs);
+
+       return offdev;
 }
+EXPORT_SYMBOL_GPL(bpf_offload_dev_create);
 
-subsys_initcall(bpf_offload_init);
+void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev)
+{
+       WARN_ON(!list_empty(&offdev->netdevs));
+       kfree(offdev);
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy);
index 98fb7938beea9dd18a255ad77ebe797b01660dea..0b38be5a955c2549ec111de5b99ecc398d68b38d 100644 (file)
@@ -725,11 +725,8 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
 {
        bool ingress = !!(md->flags & BPF_F_INGRESS);
        struct smap_psock *psock;
-       struct scatterlist *sg;
        int err = 0;
 
-       sg = md->sg_data;
-
        rcu_read_lock();
        psock = smap_psock_sk(sk);
        if (unlikely(!psock))
index 63aaac52a26553fb29529790cf0350f6dafa504b..25e47c1958745caf9e547f18572c2d4ef51c2cfe 100644 (file)
@@ -5054,7 +5054,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
        }
 
        if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
-           !bpf_offload_dev_match(prog, map)) {
+           !bpf_offload_prog_map_match(prog, map)) {
                verbose(env, "offload device mismatch between prog and map\n");
                return -EINVAL;
        }
index 077370bf89643db6967b9c7571271eacd6ddcc08..35cf3d71f8aaf4b7efcdc241953abe2108b58f6a 100644 (file)
@@ -3557,7 +3557,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
        key = &cft->lockdep_key;
 #endif
        kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
-                                 cgroup_file_mode(cft), 0, cft->kf_ops, cft,
+                                 cgroup_file_mode(cft),
+                                 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+                                 0, cft->kf_ops, cft,
                                  NULL, key);
        if (IS_ERR(kn))
                return PTR_ERR(kn);
index 8838d1158d192bfafe61fc234480ad9c9a05971f..d3d82eccdfa5da6705c5ab6a8a81136447105d5f 100644 (file)
@@ -1802,6 +1802,13 @@ config TEST_BITMAP
 
          If unsure, say N.
 
+config TEST_BITFIELD
+       tristate "Test bitfield functions at runtime"
+       help
+         Enable this option to test the bitfield functions at boot.
+
+         If unsure, say N.
+
 config TEST_UUID
        tristate "Test functions located in the uuid module at runtime"
 
index 90dc5520b7849dc69dc4c3df3ea419c45e9451cc..60d0d5f9094610fe5aefd31971b3f63079d94012 100644 (file)
@@ -65,6 +65,7 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
+obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
 obj-$(CONFIG_TEST_KMOD) += test_kmod.o
index 18989b5b3b56b8b0b59836492606ec99fd3debbe..389829d3a1d1c8ff1c2b0bd529adcb8a2cc6c46a 100644 (file)
@@ -35,6 +35,25 @@ const void *kobject_namespace(struct kobject *kobj)
        return kobj->ktype->namespace(kobj);
 }
 
+/**
+ * kobject_get_ownership - get sysfs ownership data for @kobj
+ * @kobj: kobject in question
+ * @uid: kernel user ID for sysfs objects
+ * @gid: kernel group ID for sysfs objects
+ *
+ * Returns initial uid/gid pair that should be used when creating sysfs
+ * representation of given kobject. Normally used to adjust ownership of
+ * objects in a container.
+ */
+void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+{
+       *uid = GLOBAL_ROOT_UID;
+       *gid = GLOBAL_ROOT_GID;
+
+       if (kobj->ktype->get_ownership)
+               kobj->ktype->get_ownership(kobj, uid, gid);
+}
+
 /*
  * populate_dir - populate directory with attributes.
  * @kobj: object we're working on.
@@ -868,9 +887,16 @@ static void kset_release(struct kobject *kobj)
        kfree(kset);
 }
 
+void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+{
+       if (kobj->parent)
+               kobject_get_ownership(kobj->parent, uid, gid);
+}
+
 static struct kobj_type kset_ktype = {
        .sysfs_ops      = &kobj_sysfs_ops,
-       .release = kset_release,
+       .release        = kset_release,
+       .get_ownership  = kset_get_ownership,
 };
 
 /**
index dfa55c873c1318643fdbcbe916b9c18a54edc4c9..e335bcafa9e4c3012de2f0f8606c3e542008f7b3 100644 (file)
@@ -253,8 +253,8 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
                        if (policy) {
                                err = validate_nla(nla, maxtype, policy);
                                if (err < 0) {
-                                       if (extack)
-                                               extack->bad_attr = nla;
+                                       NL_SET_ERR_MSG_ATTR(extack, nla,
+                                                           "Attribute failed policy validation");
                                        goto errout;
                                }
                        }
index fcb4ce682c6fae37d71aa6138007b51cd0d8a2b3..bf043258fa0082b4cc1100a982d2ed38e21b1c0e 100644 (file)
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
 #include <linux/kernel.h>
 #include <asm/div64.h>
 #include <linux/reciprocal_div.h>
@@ -26,3 +27,43 @@ struct reciprocal_value reciprocal_value(u32 d)
        return R;
 }
 EXPORT_SYMBOL(reciprocal_value);
+
+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec)
+{
+       struct reciprocal_value_adv R;
+       u32 l, post_shift;
+       u64 mhigh, mlow;
+
+       /* ceil(log2(d)) */
+       l = fls(d - 1);
+       /* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to
+        * be handled before calling "reciprocal_value_adv", please see the
+        * comment at include/linux/reciprocal_div.h.
+        */
+       WARN(l == 32,
+            "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor",
+            d, __func__);
+       post_shift = l;
+       mlow = 1ULL << (32 + l);
+       do_div(mlow, d);
+       mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec));
+       do_div(mhigh, d);
+
+       for (; post_shift > 0; post_shift--) {
+               u64 lo = mlow >> 1, hi = mhigh >> 1;
+
+               if (lo >= hi)
+                       break;
+
+               mlow = lo;
+               mhigh = hi;
+       }
+
+       R.m = (u32)mhigh;
+       R.sh = post_shift;
+       R.exp = l;
+       R.is_wide_m = mhigh > U32_MAX;
+
+       return R;
+}
+EXPORT_SYMBOL(reciprocal_value_adv);
index e5c8586cf7174cfe0526dc8fb3314676601c5e57..ae4223e0f5bcb68610511b2cb7ba2e12b7f7d086 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/rhashtable.h>
 #include <linux/err.h>
 #include <linux/export.h>
+#include <linux/rhashtable.h>
 
 #define HASH_DEFAULT_SIZE      64UL
 #define HASH_MIN_SIZE          4U
@@ -115,8 +116,7 @@ static void bucket_table_free_rcu(struct rcu_head *head)
 
 static union nested_table *nested_table_alloc(struct rhashtable *ht,
                                              union nested_table __rcu **prev,
-                                             unsigned int shifted,
-                                             unsigned int nhash)
+                                             bool leaf)
 {
        union nested_table *ntbl;
        int i;
@@ -127,10 +127,9 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht,
 
        ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
 
-       if (ntbl && shifted) {
-               for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
-                       INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
-                                           (i << shifted) | nhash);
+       if (ntbl && leaf) {
+               for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
+                       INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
        }
 
        rcu_assign_pointer(*prev, ntbl);
@@ -156,7 +155,7 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
                return NULL;
 
        if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
-                               0, 0)) {
+                               false)) {
                kfree(tbl);
                return NULL;
        }
@@ -206,7 +205,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
        tbl->hash_rnd = get_random_u32();
 
        for (i = 0; i < nbuckets; i++)
-               INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
+               INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
 
        return tbl;
 }
@@ -227,8 +226,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 {
        struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
-       struct bucket_table *new_tbl = rhashtable_last_table(ht,
-               rht_dereference_rcu(old_tbl->future_tbl, ht));
+       struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
        struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
        int err = -EAGAIN;
        struct rhash_head *head, *next, *entry;
@@ -298,21 +296,14 @@ static int rhashtable_rehash_attach(struct rhashtable *ht,
                                    struct bucket_table *old_tbl,
                                    struct bucket_table *new_tbl)
 {
-       /* Protect future_tbl using the first bucket lock. */
-       spin_lock_bh(old_tbl->locks);
-
-       /* Did somebody beat us to it? */
-       if (rcu_access_pointer(old_tbl->future_tbl)) {
-               spin_unlock_bh(old_tbl->locks);
-               return -EEXIST;
-       }
-
        /* Make insertions go into the new, empty table right away. Deletions
         * and lookups will be attempted in both tables until we synchronize.
+        * As cmpxchg() provides strong barriers, we do not need
+        * rcu_assign_pointer().
         */
-       rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
 
-       spin_unlock_bh(old_tbl->locks);
+       if (cmpxchg(&old_tbl->future_tbl, NULL, new_tbl) != NULL)
+               return -EEXIST;
 
        return 0;
 }
@@ -475,7 +466,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
 
 fail:
        /* Do not fail the insert if someone else did a rehash. */
-       if (likely(rcu_dereference_raw(tbl->future_tbl)))
+       if (likely(rcu_access_pointer(tbl->future_tbl)))
                return 0;
 
        /* Schedule async rehash to retry allocation in process context. */
@@ -548,7 +539,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
        if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
                return ERR_CAST(data);
 
-       new_tbl = rcu_dereference(tbl->future_tbl);
+       new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
        if (new_tbl)
                return new_tbl;
 
@@ -607,7 +598,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
                        break;
 
                spin_unlock_bh(lock);
-               tbl = rcu_dereference(tbl->future_tbl);
+               tbl = rht_dereference_rcu(tbl->future_tbl, ht);
        }
 
        data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
@@ -1002,7 +993,6 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
  *     .key_offset = offsetof(struct test_obj, key),
  *     .key_len = sizeof(int),
  *     .hashfn = jhash,
- *     .nulls_base = (1U << RHT_BASE_SHIFT),
  * };
  *
  * Configuration Example 2: Variable length keys
@@ -1034,9 +1024,6 @@ int rhashtable_init(struct rhashtable *ht,
            (params->obj_hashfn && !params->obj_cmpfn))
                return -EINVAL;
 
-       if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
-               return -EINVAL;
-
        memset(ht, 0, sizeof(*ht));
        mutex_init(&ht->mutex);
        spin_lock_init(&ht->lock);
@@ -1100,10 +1087,6 @@ int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
 {
        int err;
 
-       /* No rhlist NULLs marking for now. */
-       if (params->nulls_base)
-               return -EINVAL;
-
        err = rhashtable_init(&hlt->ht, params);
        hlt->ht.rhlist = true;
        return err;
@@ -1227,25 +1210,18 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
        unsigned int index = hash & ((1 << tbl->nest) - 1);
        unsigned int size = tbl->size >> tbl->nest;
        union nested_table *ntbl;
-       unsigned int shifted;
-       unsigned int nhash;
 
        ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
        hash >>= tbl->nest;
-       nhash = index;
-       shifted = tbl->nest;
        ntbl = nested_table_alloc(ht, &ntbl[index].table,
-                                 size <= (1 << shift) ? shifted : 0, nhash);
+                                 size <= (1 << shift));
 
        while (ntbl && size > (1 << shift)) {
                index = hash & ((1 << shift) - 1);
                size >>= shift;
                hash >>= shift;
-               nhash |= index << shifted;
-               shifted += shift;
                ntbl = nested_table_alloc(ht, &ntbl[index].table,
-                                         size <= (1 << shift) ? shifted : 0,
-                                         nhash);
+                                         size <= (1 << shift));
        }
 
        if (!ntbl)
diff --git a/lib/test_bitfield.c b/lib/test_bitfield.c
new file mode 100644 (file)
index 0000000..5b8f410
--- /dev/null
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Test cases for bitfield helpers.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/bitfield.h>
+
+#define CHECK_ENC_GET_U(tp, v, field, res) do {                                \
+               {                                                       \
+                       u##tp _res;                                     \
+                                                                       \
+                       _res = u##tp##_encode_bits(v, field);           \
+                       if (_res != res) {                              \
+                               pr_warn("u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n",\
+                                       (u64)_res);                     \
+                               return -EINVAL;                         \
+                       }                                               \
+                       if (u##tp##_get_bits(_res, field) != v)         \
+                               return -EINVAL;                         \
+               }                                                       \
+       } while (0)
+
+#define CHECK_ENC_GET_LE(tp, v, field, res) do {                       \
+               {                                                       \
+                       __le##tp _res;                                  \
+                                                                       \
+                       _res = le##tp##_encode_bits(v, field);          \
+                       if (_res != cpu_to_le##tp(res)) {               \
+                               pr_warn("le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\
+                                       (u64)le##tp##_to_cpu(_res),     \
+                                       (u64)(res));                    \
+                               return -EINVAL;                         \
+                       }                                               \
+                       if (le##tp##_get_bits(_res, field) != v)        \
+                               return -EINVAL;                         \
+               }                                                       \
+       } while (0)
+
+#define CHECK_ENC_GET_BE(tp, v, field, res) do {                       \
+               {                                                       \
+                       __be##tp _res;                                  \
+                                                                       \
+                       _res = be##tp##_encode_bits(v, field);          \
+                       if (_res != cpu_to_be##tp(res)) {               \
+                               pr_warn("be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\
+                                       (u64)be##tp##_to_cpu(_res),     \
+                                       (u64)(res));                    \
+                               return -EINVAL;                         \
+                       }                                               \
+                       if (be##tp##_get_bits(_res, field) != v)        \
+                               return -EINVAL;                         \
+               }                                                       \
+       } while (0)
+
+#define CHECK_ENC_GET(tp, v, field, res) do {                          \
+               CHECK_ENC_GET_U(tp, v, field, res);                     \
+               CHECK_ENC_GET_LE(tp, v, field, res);                    \
+               CHECK_ENC_GET_BE(tp, v, field, res);                    \
+       } while (0)
+
+static int test_constants(void)
+{
+       /*
+        * NOTE
+        * This whole function compiles (or at least should, if everything
+        * is going according to plan) to nothing after optimisation.
+        */
+
+       CHECK_ENC_GET(16,  1, 0x000f, 0x0001);
+       CHECK_ENC_GET(16,  3, 0x00f0, 0x0030);
+       CHECK_ENC_GET(16,  5, 0x0f00, 0x0500);
+       CHECK_ENC_GET(16,  7, 0xf000, 0x7000);
+       CHECK_ENC_GET(16, 14, 0x000f, 0x000e);
+       CHECK_ENC_GET(16, 15, 0x00f0, 0x00f0);
+
+       CHECK_ENC_GET_U(8,  1, 0x0f, 0x01);
+       CHECK_ENC_GET_U(8,  3, 0xf0, 0x30);
+       CHECK_ENC_GET_U(8, 14, 0x0f, 0x0e);
+       CHECK_ENC_GET_U(8, 15, 0xf0, 0xf0);
+
+       CHECK_ENC_GET(32,  1, 0x00000f00, 0x00000100);
+       CHECK_ENC_GET(32,  3, 0x0000f000, 0x00003000);
+       CHECK_ENC_GET(32,  5, 0x000f0000, 0x00050000);
+       CHECK_ENC_GET(32,  7, 0x00f00000, 0x00700000);
+       CHECK_ENC_GET(32, 14, 0x0f000000, 0x0e000000);
+       CHECK_ENC_GET(32, 15, 0xf0000000, 0xf0000000);
+
+       CHECK_ENC_GET(64,  1, 0x00000f0000000000ull, 0x0000010000000000ull);
+       CHECK_ENC_GET(64,  3, 0x0000f00000000000ull, 0x0000300000000000ull);
+       CHECK_ENC_GET(64,  5, 0x000f000000000000ull, 0x0005000000000000ull);
+       CHECK_ENC_GET(64,  7, 0x00f0000000000000ull, 0x0070000000000000ull);
+       CHECK_ENC_GET(64, 14, 0x0f00000000000000ull, 0x0e00000000000000ull);
+       CHECK_ENC_GET(64, 15, 0xf000000000000000ull, 0xf000000000000000ull);
+
+       return 0;
+}
+
+#define CHECK(tp, mask) do {                                           \
+               u64 v;                                                  \
+                                                                       \
+               for (v = 0; v < 1 << hweight32(mask); v++)              \
+                       if (tp##_encode_bits(v, mask) != v << __ffs64(mask)) \
+                               return -EINVAL;                         \
+       } while (0)
+
+static int test_variables(void)
+{
+       CHECK(u8, 0x0f);
+       CHECK(u8, 0xf0);
+       CHECK(u8, 0x38);
+
+       CHECK(u16, 0x0038);
+       CHECK(u16, 0x0380);
+       CHECK(u16, 0x3800);
+       CHECK(u16, 0x8000);
+
+       CHECK(u32, 0x80000000);
+       CHECK(u32, 0x7f000000);
+       CHECK(u32, 0x07e00000);
+       CHECK(u32, 0x00018000);
+
+       CHECK(u64, 0x8000000000000000ull);
+       CHECK(u64, 0x7f00000000000000ull);
+       CHECK(u64, 0x0001800000000000ull);
+       CHECK(u64, 0x0000000080000000ull);
+       CHECK(u64, 0x000000007f000000ull);
+       CHECK(u64, 0x0000000018000000ull);
+       CHECK(u64, 0x0000001f8000000ull);
+
+       return 0;
+}
+
+static int __init test_bitfields(void)
+{
+       int ret = test_constants();
+
+       if (ret) {
+               pr_warn("constant tests failed!\n");
+               return ret;
+       }
+
+       ret = test_variables();
+       if (ret) {
+               pr_warn("variable tests failed!\n");
+               return ret;
+       }
+
+#ifdef TEST_BITFIELD_COMPILE
+       /* these should fail compilation */
+       CHECK_ENC_GET(16, 16, 0x0f00, 0x1000);
+       u32_encode_bits(7, 0x06000000);
+
+       /* this should at least give a warning */
+       u16_encode_bits(0, 0x60000);
+#endif
+
+       pr_info("tests passed\n");
+
+       return 0;
+}
+module_init(test_bitfields)
+
+MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
+MODULE_LICENSE("GPL");
index fb69681091134cc879748b40f1f371978056be25..82ac39ce53105f2dc39d517467333b255ae218cb 100644 (file)
@@ -83,7 +83,7 @@ static u32 my_hashfn(const void *data, u32 len, u32 seed)
 {
        const struct test_obj_rhl *obj = data;
 
-       return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE;
+       return (obj->value.id % 10);
 }
 
 static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
@@ -99,7 +99,6 @@ static struct rhashtable_params test_rht_params = {
        .key_offset = offsetof(struct test_obj, value),
        .key_len = sizeof(struct test_obj_val),
        .hashfn = jhash,
-       .nulls_base = (3U << RHT_BASE_SHIFT),
 };
 
 static struct rhashtable_params test_rht_params_dup = {
@@ -296,8 +295,6 @@ static int __init test_rhltable(unsigned int entries)
        if (!obj_in_table)
                goto out_free;
 
-       /* nulls_base not supported in rhlist interface */
-       test_rht_params.nulls_base = 0;
        err = rhltable_init(&rhlt, &test_rht_params);
        if (WARN_ON(err))
                goto out_free;
@@ -501,6 +498,8 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
        unsigned int i, cnt = 0;
 
        ht = &rhlt->ht;
+       /* Take the mutex to avoid RCU warning */
+       mutex_lock(&ht->mutex);
        tbl = rht_dereference(ht->tbl, ht);
        for (i = 0; i < tbl->size; i++) {
                struct rhash_head *pos, *next;
@@ -534,6 +533,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
                }
        }
        printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff);
+       mutex_unlock(&ht->mutex);
 
        return cnt;
 }
index 9b703454b93ec2f4d0710ddec86d94bcfd223c14..e05d4d7aab354d6c86148152fbd3bdc17bba8d2f 100644 (file)
@@ -9,4 +9,3 @@ obj-$(CONFIG_VLAN_8021Q)                += 8021q.o
 8021q-$(CONFIG_VLAN_8021Q_GVRP)                += vlan_gvrp.o
 8021q-$(CONFIG_VLAN_8021Q_MVRP)                += vlan_mvrp.o
 8021q-$(CONFIG_PROC_FS)                        += vlanproc.o
-
index 8ccee3d01822f78184357141ced7a07c3109dc2c..5e99504539559e39f45e873171e53a565195b7bc 100644 (file)
@@ -647,13 +647,14 @@ out:
        return err;
 }
 
-static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
-                                        struct sk_buff *skb)
+static struct sk_buff *vlan_gro_receive(struct list_head *head,
+                                       struct sk_buff *skb)
 {
-       struct sk_buff *p, **pp = NULL;
-       struct vlan_hdr *vhdr;
-       unsigned int hlen, off_vlan;
        const struct packet_offload *ptype;
+       unsigned int hlen, off_vlan;
+       struct sk_buff *pp = NULL;
+       struct vlan_hdr *vhdr;
+       struct sk_buff *p;
        __be16 type;
        int flush = 1;
 
@@ -675,7 +676,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
 
        flush = 0;
 
-       for (p = *head; p; p = p->next) {
+       list_for_each_entry(p, head, list) {
                struct vlan_hdr *vhdr2;
 
                if (!NAPI_GRO_CB(p)->same_flow)
index f738a6f27665515a67c88e0b1725a120560a9ce5..228dfa382eeca8afd12d32475978259738dceb53 100644 (file)
@@ -12,7 +12,7 @@ menuconfig NET
          The reason is that some programs need kernel networking support even
          when running on a stand-alone machine that isn't connected to any
          other computer.
-         
+
          If you are upgrading from an older kernel, you
          should consider updating your networking tools too because changes
          in the kernel and the tools often go hand in hand. The tools are
index a7a68e5096288df11af1037297189962dc2fa548..9f8cb0d2e71ef226130a6c8ace4929ae0c8886d2 100644 (file)
@@ -653,7 +653,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
        struct atm_vcc *vcc;
        __poll_t mask;
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
        mask = 0;
 
        vcc = ATM_SD(sock);
index b93cc0f182929e6a52e66bf5eb933bbbd360c19f..46d6cd9a36ae7ae42798010bd65c4fd91b61869d 100644 (file)
@@ -307,9 +307,3 @@ void mpc_proc_clean(void)
 }
 
 #endif /* CONFIG_PROC_FS */
-
-
-
-
-
-
index ac2542b7be889955725c3329ad2493e9d6434a74..a14cfa736b63cd88cc5974221df4b1a5283023f7 100644 (file)
@@ -304,4 +304,3 @@ void ax25_digi_invert(const ax25_digi *in, ax25_digi *out)
                }
        }
 }
-
index 891596e7427835b37c752018b1627ca34e7538f9..488fc2d7085a5959109539a5433a7ac235ff59ea 100644 (file)
@@ -299,4 +299,3 @@ int ax25_ds_frame_in(ax25_cb *ax25, struct sk_buff *skb, int type)
 
        return queued;
 }
-
index 28827e81ba2b42eee62f6411cc742b6752d6306a..bc0329f4301377ee714c888d295869b5b293502c 100644 (file)
@@ -205,4 +205,3 @@ void ax25_dama_off(ax25_cb *ax25)
        ax25->condition &= ~AX25_COND_DAMA_MODE;
        ax25_dev_dama_off(ax25->ax25_dev);
 }
-
index 183b1c583d561f7a925de423eb5ea9fef7e025b3..70417e9b932ddcc2d7e5eb8ff1652a0d6ae6d457 100644 (file)
@@ -249,4 +249,3 @@ const struct header_ops ax25_header_ops = {
 
 EXPORT_SYMBOL(ax25_header_ops);
 EXPORT_SYMBOL(ax25_ip_xmit);
-
index b11a5f466fcce742dda24e1edc6f40a038ffcf4e..3e5afc8dc93e0860e04331269a8d51c725d7c8bf 100644 (file)
@@ -394,4 +394,3 @@ int ax25_check_iframes_acked(ax25_cb *ax25, unsigned short nr)
        }
        return 0;
 }
-
index de8034d8062358276253e9e7915d05f3a3bee734..361116f77cb96d758a05bd1b559a90ee1e3d4b4e 100644 (file)
@@ -24,7 +24,6 @@ config BATMAN_ADV
        depends on NET
        select CRC16
        select LIBCRC32C
-        default n
        help
           B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
           a routing protocol for multi-hop ad-hoc mesh networks. The
@@ -33,7 +32,7 @@ config BATMAN_ADV
           tools.
 
 config BATMAN_ADV_BATMAN_V
-       bool "B.A.T.M.A.N. V protocol (experimental)"
+       bool "B.A.T.M.A.N. V protocol"
        depends on BATMAN_ADV && !(CFG80211=m && BATMAN_ADV=y)
        default y
        help
@@ -60,7 +59,7 @@ config BATMAN_ADV_BLA
 config BATMAN_ADV_DAT
        bool "Distributed ARP Table"
        depends on BATMAN_ADV && INET
-       default n
+       default y
        help
          This option enables DAT (Distributed ARP Table), a DHT based
          mechanism that increases ARP reliability on sparse wireless
@@ -70,7 +69,6 @@ config BATMAN_ADV_DAT
 config BATMAN_ADV_NC
        bool "Network Coding"
        depends on BATMAN_ADV
-       default n
        help
          This option enables network coding, a mechanism that aims to
          increase the overall network throughput by fusing multiple
@@ -84,7 +82,6 @@ config BATMAN_ADV_NC
 config BATMAN_ADV_MCAST
        bool "Multicast optimisation"
        depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y)
-       default n
        help
          This option enables the multicast optimisation which aims to
          reduce the air overhead while improving the reliability of
@@ -94,7 +91,6 @@ config BATMAN_ADV_DEBUGFS
        bool "batman-adv debugfs entries"
        depends on BATMAN_ADV
        depends on DEBUG_FS
-       default n
        help
          Enable this to export routing related debug tables via debugfs.
          The information for each soft-interface and used hard-interface can be
index 317cafd302cfdd62c5ed6e7b6171a3e2ee4b6214..3dc6a7a43eb71dbdd40701602bc2f44a6854e06e 100644 (file)
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef _BATMAN_ADV_BATADV_IV_OGM_H_
-#define _BATMAN_ADV_BATADV_IV_OGM_H_
+#ifndef _NET_BATMAN_ADV_BAT_IV_OGM_H_
+#define _NET_BATMAN_ADV_BAT_IV_OGM_H_
 
 #include "main.h"
 
 int batadv_iv_init(void);
 
-#endif /* _BATMAN_ADV_BATADV_IV_OGM_H_ */
+#endif /* _NET_BATMAN_ADV_BAT_IV_OGM_H_ */
index ed36c5e79fde8f72db1a123176ed46262f5c0b71..e5be14c908c663baa5ccfd8d4426ff85e1e61627 100644 (file)
@@ -16,8 +16,8 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef _BATMAN_ADV_BATADV_V_OGM_H_
-#define _BATMAN_ADV_BATADV_V_OGM_H_
+#ifndef _NET_BATMAN_ADV_BAT_V_OGM_H_
+#define _NET_BATMAN_ADV_BAT_V_OGM_H_
 
 #include "main.h"
 
@@ -34,4 +34,4 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface);
 int batadv_v_ogm_packet_recv(struct sk_buff *skb,
                             struct batadv_hard_iface *if_incoming);
 
-#endif /* _BATMAN_ADV_BATADV_V_OGM_H_ */
+#endif /* _NET_BATMAN_ADV_BAT_V_OGM_H_ */
index a2de5a44bd41bf5c3d521d29b72e0b225a3ace05..ff9659af6b9177cdb23523d79a4bc70665b1cc4d 100644 (file)
@@ -1449,7 +1449,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
                 * detection frames. Set the locally administered bit to avoid
                 * collisions with users mac addresses.
                 */
-               random_ether_addr(bat_priv->bla.loopdetect_addr);
+               eth_random_addr(bat_priv->bla.loopdetect_addr);
                bat_priv->bla.loopdetect_addr[0] = 0xba;
                bat_priv->bla.loopdetect_addr[1] = 0xbe;
                bat_priv->bla.loopdetect_lasttime = jiffies;
index 87479c60670ebfbe2ad3df17130f1289d657df7b..3cb82378300bdcc43409d8a61dbf6a77c7928a1d 100644 (file)
@@ -118,7 +118,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
 
 #ifdef CONFIG_BATMAN_ADV_DAT
 /**
- * batadv_dat_cache_open() - Prepare file handler for reads from dat_chache
+ * batadv_dat_cache_open() - Prepare file handler for reads from dat_cache
  * @inode: inode which was opened
  * @file: file handle to be initialized
  *
index 716e5b43acfae598c99b8fc1a491276aad0a2d6f..1d295da3e342b6552abfd1fbced1347db5e9a2fe 100644 (file)
@@ -1339,7 +1339,11 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
        return false;
 }
 
-static void _batadv_purge_orig(struct batadv_priv *bat_priv)
+/**
+ * batadv_purge_orig_ref() - Purge all outdated originators
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
 {
        struct batadv_hashtable *hash = bat_priv->orig_hash;
        struct hlist_node *node_tmp;
@@ -1385,21 +1389,12 @@ static void batadv_purge_orig(struct work_struct *work)
 
        delayed_work = to_delayed_work(work);
        bat_priv = container_of(delayed_work, struct batadv_priv, orig_work);
-       _batadv_purge_orig(bat_priv);
+       batadv_purge_orig_ref(bat_priv);
        queue_delayed_work(batadv_event_workqueue,
                           &bat_priv->orig_work,
                           msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD));
 }
 
-/**
- * batadv_purge_orig_ref() - Purge all outdated originators
- * @bat_priv: the bat priv with all the soft interface information
- */
-void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
-{
-       _batadv_purge_orig(bat_priv);
-}
-
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
 
 /**
index 360357f83f203c9293667f6e86969667f5b64bbd..343d304851a5c9757f7993043692c20ac03de76e 100644 (file)
@@ -43,12 +43,13 @@ struct seq_file;
 #ifdef CONFIG_BATMAN_ADV_DAT
 
 /**
- * batadv_dat_addr_t - it is the type used for all DHT addresses. If it is
- *  changed, BATADV_DAT_ADDR_MAX is changed as well.
+ * typedef batadv_dat_addr_t - type used for all DHT addresses
+ *
+ * If it is changed, BATADV_DAT_ADDR_MAX is changed as well.
  *
  * *Please be careful: batadv_dat_addr_t must be UNSIGNED*
  */
-#define batadv_dat_addr_t u16
+typedef u16 batadv_dat_addr_t;
 
 #endif /* CONFIG_BATMAN_ADV_DAT */
 
index 76deb661588322d9cf8ac6bdd73ba63f5d1416fc..e558b46596c49a2780d98db511f4304cf5b2902e 100644 (file)
@@ -13,4 +13,3 @@ config BPFILTER_UMH
        help
          This builds bpfilter kernel module with embedded user mode helper
 endif
-
index 9019f326fe81e7e29a2ec685a134040afcf8edb6..5372e2042adfe20d3cd039c29057535b2413be61 100644 (file)
@@ -142,7 +142,20 @@ static int deliver_clone(const struct net_bridge_port *prev,
 void br_forward(const struct net_bridge_port *to,
                struct sk_buff *skb, bool local_rcv, bool local_orig)
 {
-       if (to && should_deliver(to, skb)) {
+       if (unlikely(!to))
+               goto out;
+
+       /* redirect to backup link if the destination port is down */
+       if (rcu_access_pointer(to->backup_port) && !netif_carrier_ok(to->dev)) {
+               struct net_bridge_port *backup_port;
+
+               backup_port = rcu_dereference(to->backup_port);
+               if (unlikely(!backup_port))
+                       goto out;
+               to = backup_port;
+       }
+
+       if (should_deliver(to, skb)) {
                if (local_rcv)
                        deliver_clone(to, skb, local_orig);
                else
@@ -150,6 +163,7 @@ void br_forward(const struct net_bridge_port *to,
                return;
        }
 
+out:
        if (!local_rcv)
                kfree_skb(skb);
 }
index 05e42d86882d69de07cdb79ec9d00e0f214e69cd..0363f1bdc401db109d81b54195f6d93ddb24416b 100644 (file)
@@ -26,6 +26,7 @@
 #include <net/sock.h>
 #include <linux/if_vlan.h>
 #include <net/switchdev.h>
+#include <net/net_namespace.h>
 
 #include "br_private.h"
 
@@ -169,6 +170,58 @@ void br_manage_promisc(struct net_bridge *br)
        }
 }
 
+int nbp_backup_change(struct net_bridge_port *p,
+                     struct net_device *backup_dev)
+{
+       struct net_bridge_port *old_backup = rtnl_dereference(p->backup_port);
+       struct net_bridge_port *backup_p = NULL;
+
+       ASSERT_RTNL();
+
+       if (backup_dev) {
+               if (!br_port_exists(backup_dev))
+                       return -ENOENT;
+
+               backup_p = br_port_get_rtnl(backup_dev);
+               if (backup_p->br != p->br)
+                       return -EINVAL;
+       }
+
+       if (p == backup_p)
+               return -EINVAL;
+
+       if (old_backup == backup_p)
+               return 0;
+
+       /* if the backup link is already set, clear it */
+       if (old_backup)
+               old_backup->backup_redirected_cnt--;
+
+       if (backup_p)
+               backup_p->backup_redirected_cnt++;
+       rcu_assign_pointer(p->backup_port, backup_p);
+
+       return 0;
+}
+
+static void nbp_backup_clear(struct net_bridge_port *p)
+{
+       nbp_backup_change(p, NULL);
+       if (p->backup_redirected_cnt) {
+               struct net_bridge_port *cur_p;
+
+               list_for_each_entry(cur_p, &p->br->port_list, list) {
+                       struct net_bridge_port *backup_p;
+
+                       backup_p = rtnl_dereference(cur_p->backup_port);
+                       if (backup_p == p)
+                               nbp_backup_change(cur_p, NULL);
+               }
+       }
+
+       WARN_ON(rcu_access_pointer(p->backup_port) || p->backup_redirected_cnt);
+}
+
 static void nbp_update_port_count(struct net_bridge *br)
 {
        struct net_bridge_port *p;
@@ -204,11 +257,19 @@ static void release_nbp(struct kobject *kobj)
        kfree(p);
 }
 
+static void brport_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+{
+       struct net_bridge_port *p = kobj_to_brport(kobj);
+
+       net_ns_get_ownership(dev_net(p->dev), uid, gid);
+}
+
 static struct kobj_type brport_ktype = {
 #ifdef CONFIG_SYSFS
        .sysfs_ops = &brport_sysfs_ops,
 #endif
        .release = release_nbp,
+       .get_ownership = brport_get_ownership,
 };
 
 static void destroy_nbp(struct net_bridge_port *p)
@@ -286,6 +347,7 @@ static void del_nbp(struct net_bridge_port *p)
        nbp_vlan_flush(p);
        br_fdb_delete_by_port(br, p, 0, 1);
        switchdev_deferred_process();
+       nbp_backup_clear(p);
 
        nbp_update_port_count(br);
 
index 9f5eb05b0373750900cd298a122a16b78c374d31..ec2b58a09f76381b75179f38e438e190027a5102 100644 (file)
@@ -169,13 +169,15 @@ static inline size_t br_nlmsg_size(struct net_device *dev, u32 filter_mask)
                + nla_total_size(1) /* IFLA_OPERSTATE */
                + nla_total_size(br_port_info_size()) /* IFLA_PROTINFO */
                + nla_total_size(br_get_link_af_size_filtered(dev,
-                                filter_mask)); /* IFLA_AF_SPEC */
+                                filter_mask)) /* IFLA_AF_SPEC */
+               + nla_total_size(4); /* IFLA_BRPORT_BACKUP_PORT */
 }
 
 static int br_port_fill_attrs(struct sk_buff *skb,
                              const struct net_bridge_port *p)
 {
        u8 mode = !!(p->flags & BR_HAIRPIN_MODE);
+       struct net_bridge_port *backup_p;
        u64 timerval;
 
        if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) ||
@@ -237,6 +239,14 @@ static int br_port_fill_attrs(struct sk_buff *skb,
                return -EMSGSIZE;
 #endif
 
+       /* we might be called only with br->lock */
+       rcu_read_lock();
+       backup_p = rcu_dereference(p->backup_port);
+       if (backup_p)
+               nla_put_u32(skb, IFLA_BRPORT_BACKUP_PORT,
+                           backup_p->dev->ifindex);
+       rcu_read_unlock();
+
        return 0;
 }
 
@@ -663,6 +673,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
        [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
        [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
        [IFLA_BRPORT_ISOLATED]  = { .type = NLA_U8 },
+       [IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
 };
 
 /* Change the state of the port and notify spanning tree */
@@ -817,6 +828,23 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
        if (err)
                return err;
 
+       if (tb[IFLA_BRPORT_BACKUP_PORT]) {
+               struct net_device *backup_dev = NULL;
+               u32 backup_ifindex;
+
+               backup_ifindex = nla_get_u32(tb[IFLA_BRPORT_BACKUP_PORT]);
+               if (backup_ifindex) {
+                       backup_dev = __dev_get_by_index(dev_net(p->dev),
+                                                       backup_ifindex);
+                       if (!backup_dev)
+                               return -ENOENT;
+               }
+
+               err = nbp_backup_change(p, backup_dev);
+               if (err)
+                       return err;
+       }
+
        br_port_flags_change(p, old_flags ^ p->flags);
        return 0;
 }
index 5216a524b53701d3930a550a26e01fbafc920b6b..11ed2029985fd7a96938a5b7b838864e3a06cfbc 100644 (file)
@@ -237,6 +237,7 @@ struct net_bridge_port {
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
        struct net_bridge_vlan_group    __rcu *vlgrp;
 #endif
+       struct net_bridge_port          __rcu *backup_port;
 
        /* STP */
        u8                              priority;
@@ -281,8 +282,11 @@ struct net_bridge_port {
        int                             offload_fwd_mark;
 #endif
        u16                             group_fwd_mask;
+       u16                             backup_redirected_cnt;
 };
 
+#define kobj_to_brport(obj)    container_of(obj, struct net_bridge_port, kobj)
+
 #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
 #define br_promisc_port(p) ((p)->flags & BR_PROMISC)
 
@@ -595,6 +599,7 @@ netdev_features_t br_features_recompute(struct net_bridge *br,
                                        netdev_features_t features);
 void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
 void br_manage_promisc(struct net_bridge *br);
+int nbp_backup_change(struct net_bridge_port *p, struct net_device *backup_dev);
 
 /* br_input.c */
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
index f99c5bf5c906270e00c1e96177970ac15fdcbc71..7c87a2fe52480cd85cd240e0a5845095c17cdcb5 100644 (file)
@@ -25,6 +25,15 @@ struct brport_attribute {
        struct attribute        attr;
        ssize_t (*show)(struct net_bridge_port *, char *);
        int (*store)(struct net_bridge_port *, unsigned long);
+       int (*store_raw)(struct net_bridge_port *, char *);
+};
+
+#define BRPORT_ATTR_RAW(_name, _mode, _show, _store)                   \
+const struct brport_attribute brport_attr_##_name = {                  \
+       .attr           = {.name = __stringify(_name),                  \
+                          .mode = _mode },                             \
+       .show           = _show,                                        \
+       .store_raw      = _store,                                       \
 };
 
 #define BRPORT_ATTR(_name, _mode, _show, _store)               \
@@ -182,6 +191,38 @@ static int store_group_fwd_mask(struct net_bridge_port *p,
 static BRPORT_ATTR(group_fwd_mask, 0644, show_group_fwd_mask,
                   store_group_fwd_mask);
 
+static ssize_t show_backup_port(struct net_bridge_port *p, char *buf)
+{
+       struct net_bridge_port *backup_p;
+       int ret = 0;
+
+       rcu_read_lock();
+       backup_p = rcu_dereference(p->backup_port);
+       if (backup_p)
+               ret = sprintf(buf, "%s\n", backup_p->dev->name);
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static int store_backup_port(struct net_bridge_port *p, char *buf)
+{
+       struct net_device *backup_dev = NULL;
+       char *nl = strchr(buf, '\n');
+
+       if (nl)
+               *nl = '\0';
+
+       if (strlen(buf) > 0) {
+               backup_dev = __dev_get_by_name(dev_net(p->dev), buf);
+               if (!backup_dev)
+                       return -ENOENT;
+       }
+
+       return nbp_backup_change(p, backup_dev);
+}
+static BRPORT_ATTR_RAW(backup_port, 0644, show_backup_port, store_backup_port);
+
 BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
 BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
 BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
@@ -245,17 +286,17 @@ static const struct brport_attribute *brport_attrs[] = {
        &brport_attr_group_fwd_mask,
        &brport_attr_neigh_suppress,
        &brport_attr_isolated,
+       &brport_attr_backup_port,
        NULL
 };
 
 #define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr)
-#define to_brport(obj) container_of(obj, struct net_bridge_port, kobj)
 
 static ssize_t brport_show(struct kobject *kobj,
                           struct attribute *attr, char *buf)
 {
        struct brport_attribute *brport_attr = to_brport_attr(attr);
-       struct net_bridge_port *p = to_brport(kobj);
+       struct net_bridge_port *p = kobj_to_brport(kobj);
 
        if (!brport_attr->show)
                return -EINVAL;
@@ -268,29 +309,48 @@ static ssize_t brport_store(struct kobject *kobj,
                            const char *buf, size_t count)
 {
        struct brport_attribute *brport_attr = to_brport_attr(attr);
-       struct net_bridge_port *p = to_brport(kobj);
+       struct net_bridge_port *p = kobj_to_brport(kobj);
        ssize_t ret = -EINVAL;
-       char *endp;
        unsigned long val;
+       char *endp;
 
        if (!ns_capable(dev_net(p->dev)->user_ns, CAP_NET_ADMIN))
                return -EPERM;
 
-       val = simple_strtoul(buf, &endp, 0);
-       if (endp != buf) {
-               if (!rtnl_trylock())
-                       return restart_syscall();
-               if (p->dev && p->br && brport_attr->store) {
-                       spin_lock_bh(&p->br->lock);
-                       ret = brport_attr->store(p, val);
-                       spin_unlock_bh(&p->br->lock);
-                       if (!ret) {
-                               br_ifinfo_notify(RTM_NEWLINK, NULL, p);
-                               ret = count;
-                       }
+       if (!rtnl_trylock())
+               return restart_syscall();
+
+       if (!p->dev || !p->br)
+               goto out_unlock;
+
+       if (brport_attr->store_raw) {
+               char *buf_copy;
+
+               buf_copy = kstrndup(buf, count, GFP_KERNEL);
+               if (!buf_copy) {
+                       ret = -ENOMEM;
+                       goto out_unlock;
                }
-               rtnl_unlock();
+               spin_lock_bh(&p->br->lock);
+               ret = brport_attr->store_raw(p, buf_copy);
+               spin_unlock_bh(&p->br->lock);
+               kfree(buf_copy);
+       } else if (brport_attr->store) {
+               val = simple_strtoul(buf, &endp, 0);
+               if (endp == buf)
+                       goto out_unlock;
+               spin_lock_bh(&p->br->lock);
+               ret = brport_attr->store(p, val);
+               spin_unlock_bh(&p->br->lock);
        }
+
+       if (!ret) {
+               br_ifinfo_notify(RTM_NEWLINK, NULL, p);
+               ret = count;
+       }
+out_unlock:
+       rtnl_unlock();
+
        return ret;
 }
 
index 6de981270566966eca1c0e0eaf6ed7400fa3e9d3..08cbed7d940e37ee34e58c364326f3b75bab50f7 100644 (file)
@@ -89,8 +89,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net,
        niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
                                   net->ipv4.sysctl_ip_default_ttl);
        nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
-       niph->ttl       = net->ipv4.sysctl_ip_default_ttl;
-       niph->tot_len   = htons(nskb->len);
+       niph->tot_len = htons(nskb->len);
        ip_send_check(niph);
 
        nft_reject_br_push_etherhdr(oldskb, nskb);
index a6fb1b3bcad9b2f3c1c24b2a3496ad21b07c69d9..d18965f3291f3ec7166dedc36fd5cdf7bc4097d4 100644 (file)
@@ -941,7 +941,7 @@ static __poll_t caif_poll(struct file *file,
        __poll_t mask;
        struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
        mask = 0;
 
        /* exceptional events? */
index 9938952c5c78f1e72ef13f44517ef054a60205b2..9aac0d63d53eb6527d4b8a55e022f6a5e7970c4e 100644 (file)
@@ -837,7 +837,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
        struct sock *sk = sock->sk;
        __poll_t mask;
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
        mask = 0;
 
        /* exceptional events? */
index 559a91271f82d09ae73e7026707e9df20033b361..36e994519488e5e16e2f632878a815650478552c 100644 (file)
 
 #include "net-sysfs.h"
 
-/* Instead of increasing this, you should create a hash table. */
 #define MAX_GRO_SKBS 8
 
 /* This should be increased if a protocol with a bigger head is added. */
@@ -2068,11 +2067,13 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
                struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
                int i;
 
+               /* walk through the TCs and see if it falls into any of them */
                for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
                        if ((txq - tc->offset) < tc->count)
                                return i;
                }
 
+               /* didn't find it, just return -1 to indicate no match */
                return -1;
        }
 
@@ -2081,6 +2082,10 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 EXPORT_SYMBOL(netdev_txq_to_tc);
 
 #ifdef CONFIG_XPS
+struct static_key xps_needed __read_mostly;
+EXPORT_SYMBOL(xps_needed);
+struct static_key xps_rxqs_needed __read_mostly;
+EXPORT_SYMBOL(xps_rxqs_needed);
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)            \
        rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
@@ -2092,7 +2097,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
        int pos;
 
        if (dev_maps)
-               map = xmap_dereference(dev_maps->cpu_map[tci]);
+               map = xmap_dereference(dev_maps->attr_map[tci]);
        if (!map)
                return false;
 
@@ -2105,7 +2110,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
                        break;
                }
 
-               RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
+               RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
                kfree_rcu(map, rcu);
                return false;
        }
@@ -2135,33 +2140,68 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
        return active;
 }
 
+static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
+                          struct xps_dev_maps *dev_maps, unsigned int nr_ids,
+                          u16 offset, u16 count, bool is_rxqs_map)
+{
+       bool active = false;
+       int i, j;
+
+       for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
+            j < nr_ids;)
+               active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
+                                              count);
+       if (!active) {
+               if (is_rxqs_map) {
+                       RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+               } else {
+                       RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+
+                       for (i = offset + (count - 1); count--; i--)
+                               netdev_queue_numa_node_write(
+                                       netdev_get_tx_queue(dev, i),
+                                                       NUMA_NO_NODE);
+               }
+               kfree_rcu(dev_maps, rcu);
+       }
+}
+
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
                                   u16 count)
 {
+       const unsigned long *possible_mask = NULL;
        struct xps_dev_maps *dev_maps;
-       int cpu, i;
-       bool active = false;
+       unsigned int nr_ids;
+
+       if (!static_key_false(&xps_needed))
+               return;
 
        mutex_lock(&xps_map_mutex);
-       dev_maps = xmap_dereference(dev->xps_maps);
 
+       if (static_key_false(&xps_rxqs_needed)) {
+               dev_maps = xmap_dereference(dev->xps_rxqs_map);
+               if (dev_maps) {
+                       nr_ids = dev->num_rx_queues;
+                       clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
+                                      offset, count, true);
+               }
+       }
+
+       dev_maps = xmap_dereference(dev->xps_cpus_map);
        if (!dev_maps)
                goto out_no_maps;
 
-       for_each_possible_cpu(cpu)
-               active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
-                                              offset, count);
-
-       if (!active) {
-               RCU_INIT_POINTER(dev->xps_maps, NULL);
-               kfree_rcu(dev_maps, rcu);
-       }
-
-       for (i = offset + (count - 1); count--; i--)
-               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
-                                            NUMA_NO_NODE);
+       if (num_possible_cpus() > 1)
+               possible_mask = cpumask_bits(cpu_possible_mask);
+       nr_ids = nr_cpu_ids;
+       clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
+                      false);
 
 out_no_maps:
+       if (static_key_enabled(&xps_rxqs_needed))
+               static_key_slow_dec(&xps_rxqs_needed);
+
+       static_key_slow_dec(&xps_needed);
        mutex_unlock(&xps_map_mutex);
 }
 
@@ -2170,8 +2210,8 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
        netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
 }
 
-static struct xps_map *expand_xps_map(struct xps_map *map,
-                                     int cpu, u16 index)
+static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
+                                     u16 index, bool is_rxqs_map)
 {
        struct xps_map *new_map;
        int alloc_len = XPS_MIN_MAP_ALLOC;
@@ -2183,7 +2223,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
                return map;
        }
 
-       /* Need to add queue to this CPU's existing map */
+       /* Need to add tx-queue to this CPU's/rx-queue's existing map */
        if (map) {
                if (pos < map->alloc_len)
                        return map;
@@ -2191,9 +2231,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
                alloc_len = map->alloc_len * 2;
        }
 
-       /* Need to allocate new map to store queue on this CPU's map */
-       new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
-                              cpu_to_node(cpu));
+       /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
+        *  map
+        */
+       if (is_rxqs_map)
+               new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
+       else
+               new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+                                      cpu_to_node(attr_index));
        if (!new_map)
                return NULL;
 
@@ -2205,32 +2250,52 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
        return new_map;
 }
 
-int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
-                       u16 index)
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+                         u16 index, bool is_rxqs_map)
 {
+       const unsigned long *online_mask = NULL, *possible_mask = NULL;
        struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
-       int i, cpu, tci, numa_node_id = -2;
+       int i, j, tci, numa_node_id = -2;
        int maps_sz, num_tc = 1, tc = 0;
        struct xps_map *map, *new_map;
        bool active = false;
+       unsigned int nr_ids;
 
        if (dev->num_tc) {
+               /* Do not allow XPS on subordinate device directly */
                num_tc = dev->num_tc;
+               if (num_tc < 0)
+                       return -EINVAL;
+
+               /* If queue belongs to subordinate dev use its map */
+               dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
                tc = netdev_txq_to_tc(dev, index);
                if (tc < 0)
                        return -EINVAL;
        }
 
-       maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
-       if (maps_sz < L1_CACHE_BYTES)
-               maps_sz = L1_CACHE_BYTES;
-
        mutex_lock(&xps_map_mutex);
+       if (is_rxqs_map) {
+               maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
+               dev_maps = xmap_dereference(dev->xps_rxqs_map);
+               nr_ids = dev->num_rx_queues;
+       } else {
+               maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
+               if (num_possible_cpus() > 1) {
+                       online_mask = cpumask_bits(cpu_online_mask);
+                       possible_mask = cpumask_bits(cpu_possible_mask);
+               }
+               dev_maps = xmap_dereference(dev->xps_cpus_map);
+               nr_ids = nr_cpu_ids;
+       }
 
-       dev_maps = xmap_dereference(dev->xps_maps);
+       if (maps_sz < L1_CACHE_BYTES)
+               maps_sz = L1_CACHE_BYTES;
 
        /* allocate memory for queue storage */
-       for_each_cpu_and(cpu, cpu_online_mask, mask) {
+       for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
+            j < nr_ids;) {
                if (!new_dev_maps)
                        new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
                if (!new_dev_maps) {
@@ -2238,73 +2303,85 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
                        return -ENOMEM;
                }
 
-               tci = cpu * num_tc + tc;
-               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
+               tci = j * num_tc + tc;
+               map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
                                 NULL;
 
-               map = expand_xps_map(map, cpu, index);
+               map = expand_xps_map(map, j, index, is_rxqs_map);
                if (!map)
                        goto error;
 
-               RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+               RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
        }
 
        if (!new_dev_maps)
                goto out_no_new_maps;
 
-       for_each_possible_cpu(cpu) {
+       static_key_slow_inc(&xps_needed);
+       if (is_rxqs_map)
+               static_key_slow_inc(&xps_rxqs_needed);
+
+       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+            j < nr_ids;) {
                /* copy maps belonging to foreign traffic classes */
-               for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
+               for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
                        /* fill in the new device map from the old device map */
-                       map = xmap_dereference(dev_maps->cpu_map[tci]);
-                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+                       map = xmap_dereference(dev_maps->attr_map[tci]);
+                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
                }
 
                /* We need to explicitly update tci as prevous loop
                 * could break out early if dev_maps is NULL.
                 */
-               tci = cpu * num_tc + tc;
+               tci = j * num_tc + tc;
 
-               if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
-                       /* add queue to CPU maps */
+               if (netif_attr_test_mask(j, mask, nr_ids) &&
+                   netif_attr_test_online(j, online_mask, nr_ids)) {
+                       /* add tx-queue to CPU/rx-queue maps */
                        int pos = 0;
 
-                       map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+                       map = xmap_dereference(new_dev_maps->attr_map[tci]);
                        while ((pos < map->len) && (map->queues[pos] != index))
                                pos++;
 
                        if (pos == map->len)
                                map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
-                       if (numa_node_id == -2)
-                               numa_node_id = cpu_to_node(cpu);
-                       else if (numa_node_id != cpu_to_node(cpu))
-                               numa_node_id = -1;
+                       if (!is_rxqs_map) {
+                               if (numa_node_id == -2)
+                                       numa_node_id = cpu_to_node(j);
+                               else if (numa_node_id != cpu_to_node(j))
+                                       numa_node_id = -1;
+                       }
 #endif
                } else if (dev_maps) {
                        /* fill in the new device map from the old device map */
-                       map = xmap_dereference(dev_maps->cpu_map[tci]);
-                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+                       map = xmap_dereference(dev_maps->attr_map[tci]);
+                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
                }
 
                /* copy maps belonging to foreign traffic classes */
                for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
                        /* fill in the new device map from the old device map */
-                       map = xmap_dereference(dev_maps->cpu_map[tci]);
-                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+                       map = xmap_dereference(dev_maps->attr_map[tci]);
+                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
                }
        }
 
-       rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+       if (is_rxqs_map)
+               rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
+       else
+               rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
 
        /* Cleanup old maps */
        if (!dev_maps)
                goto out_no_old_maps;
 
-       for_each_possible_cpu(cpu) {
-               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
-                       map = xmap_dereference(dev_maps->cpu_map[tci]);
+       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+            j < nr_ids;) {
+               for (i = num_tc, tci = j * num_tc; i--; tci++) {
+                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+                       map = xmap_dereference(dev_maps->attr_map[tci]);
                        if (map && map != new_map)
                                kfree_rcu(map, rcu);
                }
@@ -2317,19 +2394,23 @@ out_no_old_maps:
        active = true;
 
 out_no_new_maps:
-       /* update Tx queue numa node */
-       netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
-                                    (numa_node_id >= 0) ? numa_node_id :
-                                    NUMA_NO_NODE);
+       if (!is_rxqs_map) {
+               /* update Tx queue numa node */
+               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+                                            (numa_node_id >= 0) ?
+                                            numa_node_id : NUMA_NO_NODE);
+       }
 
        if (!dev_maps)
                goto out_no_maps;
 
-       /* removes queue from unused CPUs */
-       for_each_possible_cpu(cpu) {
-               for (i = tc, tci = cpu * num_tc; i--; tci++)
+       /* removes tx-queue from unused CPUs/rx-queues */
+       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+            j < nr_ids;) {
+               for (i = tc, tci = j * num_tc; i--; tci++)
                        active |= remove_xps_queue(dev_maps, tci, index);
-               if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
+               if (!netif_attr_test_mask(j, mask, nr_ids) ||
+                   !netif_attr_test_online(j, online_mask, nr_ids))
                        active |= remove_xps_queue(dev_maps, tci, index);
                for (i = num_tc - tc, tci++; --i; tci++)
                        active |= remove_xps_queue(dev_maps, tci, index);
@@ -2337,7 +2418,10 @@ out_no_new_maps:
 
        /* free map if not active */
        if (!active) {
-               RCU_INIT_POINTER(dev->xps_maps, NULL);
+               if (is_rxqs_map)
+                       RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+               else
+                       RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
                kfree_rcu(dev_maps, rcu);
        }
 
@@ -2347,11 +2431,12 @@ out_no_maps:
        return 0;
 error:
        /* remove any maps that we added */
-       for_each_possible_cpu(cpu) {
-               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+            j < nr_ids;) {
+               for (i = num_tc, tci = j * num_tc; i--; tci++) {
+                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
                        map = dev_maps ?
-                             xmap_dereference(dev_maps->cpu_map[tci]) :
+                             xmap_dereference(dev_maps->attr_map[tci]) :
                              NULL;
                        if (new_map && new_map != map)
                                kfree(new_map);
@@ -2363,14 +2448,34 @@ error:
        kfree(new_dev_maps);
        return -ENOMEM;
 }
+
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+                       u16 index)
+{
+       return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+}
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
+static void netdev_unbind_all_sb_channels(struct net_device *dev)
+{
+       struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+       /* Unbind any subordinate channels */
+       while (txq-- != &dev->_tx[0]) {
+               if (txq->sb_dev)
+                       netdev_unbind_sb_channel(dev, txq->sb_dev);
+       }
+}
+
 void netdev_reset_tc(struct net_device *dev)
 {
 #ifdef CONFIG_XPS
        netif_reset_xps_queues_gt(dev, 0);
 #endif
+       netdev_unbind_all_sb_channels(dev);
+
+       /* Reset TC configuration of device */
        dev->num_tc = 0;
        memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
        memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
@@ -2399,11 +2504,77 @@ int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
 #ifdef CONFIG_XPS
        netif_reset_xps_queues_gt(dev, 0);
 #endif
+       netdev_unbind_all_sb_channels(dev);
+
        dev->num_tc = num_tc;
        return 0;
 }
 EXPORT_SYMBOL(netdev_set_num_tc);
 
+void netdev_unbind_sb_channel(struct net_device *dev,
+                             struct net_device *sb_dev)
+{
+       struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+#ifdef CONFIG_XPS
+       netif_reset_xps_queues_gt(sb_dev, 0);
+#endif
+       memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
+       memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
+
+       while (txq-- != &dev->_tx[0]) {
+               if (txq->sb_dev == sb_dev)
+                       txq->sb_dev = NULL;
+       }
+}
+EXPORT_SYMBOL(netdev_unbind_sb_channel);
+
+int netdev_bind_sb_channel_queue(struct net_device *dev,
+                                struct net_device *sb_dev,
+                                u8 tc, u16 count, u16 offset)
+{
+       /* Make certain the sb_dev and dev are already configured */
+       if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
+               return -EINVAL;
+
+       /* We cannot hand out queues we don't have */
+       if ((offset + count) > dev->real_num_tx_queues)
+               return -EINVAL;
+
+       /* Record the mapping */
+       sb_dev->tc_to_txq[tc].count = count;
+       sb_dev->tc_to_txq[tc].offset = offset;
+
+       /* Provide a way for Tx queue to find the tc_to_txq map or
+        * XPS map for itself.
+        */
+       while (count--)
+               netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
+
+       return 0;
+}
+EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
+
+int netdev_set_sb_channel(struct net_device *dev, u16 channel)
+{
+       /* Do not use a multiqueue device to represent a subordinate channel */
+       if (netif_is_multiqueue(dev))
+               return -ENODEV;
+
+       /* We allow channels 1 - 32767 to be used for subordinate channels.
+        * Channel 0 is meant to be "native" mode and used only to represent
+        * the main root device. We allow writing 0 to reset the device back
+        * to normal mode after being used as a subordinate channel.
+        */
+       if (channel > S16_MAX)
+               return -EINVAL;
+
+       dev->num_tc = -channel;
+
+       return 0;
+}
+EXPORT_SYMBOL(netdev_set_sb_channel);
+
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -2615,24 +2786,26 @@ EXPORT_SYMBOL(netif_device_attach);
  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
  * to be used as a distribution range.
  */
-static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb)
+static u16 skb_tx_hash(const struct net_device *dev,
+                      const struct net_device *sb_dev,
+                      struct sk_buff *skb)
 {
        u32 hash;
        u16 qoffset = 0;
        u16 qcount = dev->real_num_tx_queues;
 
+       if (dev->num_tc) {
+               u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+
+               qoffset = sb_dev->tc_to_txq[tc].offset;
+               qcount = sb_dev->tc_to_txq[tc].count;
+       }
+
        if (skb_rx_queue_recorded(skb)) {
                hash = skb_get_rx_queue(skb);
                while (unlikely(hash >= qcount))
                        hash -= qcount;
-               return hash;
-       }
-
-       if (dev->num_tc) {
-               u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-
-               qoffset = dev->tc_to_txq[tc].offset;
-               qcount = dev->tc_to_txq[tc].count;
+               return hash + qoffset;
        }
 
        return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
@@ -3376,32 +3549,64 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 }
 #endif /* CONFIG_NET_EGRESS */
 
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+#ifdef CONFIG_XPS
+static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
+                              struct xps_dev_maps *dev_maps, unsigned int tci)
+{
+       struct xps_map *map;
+       int queue_index = -1;
+
+       if (dev->num_tc) {
+               tci *= dev->num_tc;
+               tci += netdev_get_prio_tc_map(dev, skb->priority);
+       }
+
+       map = rcu_dereference(dev_maps->attr_map[tci]);
+       if (map) {
+               if (map->len == 1)
+                       queue_index = map->queues[0];
+               else
+                       queue_index = map->queues[reciprocal_scale(
+                                               skb_get_hash(skb), map->len)];
+               if (unlikely(queue_index >= dev->real_num_tx_queues))
+                       queue_index = -1;
+       }
+       return queue_index;
+}
+#endif
+
+static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
+                        struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
        struct xps_dev_maps *dev_maps;
-       struct xps_map *map;
+       struct sock *sk = skb->sk;
        int queue_index = -1;
 
+       if (!static_key_false(&xps_needed))
+               return -1;
+
        rcu_read_lock();
-       dev_maps = rcu_dereference(dev->xps_maps);
+       if (!static_key_false(&xps_rxqs_needed))
+               goto get_cpus_map;
+
+       dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
        if (dev_maps) {
-               unsigned int tci = skb->sender_cpu - 1;
+               int tci = sk_rx_queue_get(sk);
 
-               if (dev->num_tc) {
-                       tci *= dev->num_tc;
-                       tci += netdev_get_prio_tc_map(dev, skb->priority);
-               }
+               if (tci >= 0 && tci < dev->num_rx_queues)
+                       queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
+                                                         tci);
+       }
 
-               map = rcu_dereference(dev_maps->cpu_map[tci]);
-               if (map) {
-                       if (map->len == 1)
-                               queue_index = map->queues[0];
-                       else
-                               queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
-                                                                          map->len)];
-                       if (unlikely(queue_index >= dev->real_num_tx_queues))
-                               queue_index = -1;
+get_cpus_map:
+       if (queue_index < 0) {
+               dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
+               if (dev_maps) {
+                       unsigned int tci = skb->sender_cpu - 1;
+
+                       queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
+                                                         tci);
                }
        }
        rcu_read_unlock();
@@ -3412,17 +3617,36 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 #endif
 }
 
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
+                    struct net_device *sb_dev,
+                    select_queue_fallback_t fallback)
+{
+       return 0;
+}
+EXPORT_SYMBOL(dev_pick_tx_zero);
+
+u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
+                      struct net_device *sb_dev,
+                      select_queue_fallback_t fallback)
+{
+       return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
+}
+EXPORT_SYMBOL(dev_pick_tx_cpu_id);
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
+                           struct net_device *sb_dev)
 {
        struct sock *sk = skb->sk;
        int queue_index = sk_tx_queue_get(sk);
 
+       sb_dev = sb_dev ? : dev;
+
        if (queue_index < 0 || skb->ooo_okay ||
            queue_index >= dev->real_num_tx_queues) {
-               int new_index = get_xps_queue(dev, skb);
+               int new_index = get_xps_queue(dev, sb_dev, skb);
 
                if (new_index < 0)
-                       new_index = skb_tx_hash(dev, skb);
+                       new_index = skb_tx_hash(dev, sb_dev, skb);
 
                if (queue_index != new_index && sk &&
                    sk_fullsock(sk) &&
@@ -3437,7 +3661,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
                                    struct sk_buff *skb,
-                                   void *accel_priv)
+                                   struct net_device *sb_dev)
 {
        int queue_index = 0;
 
@@ -3452,10 +3676,10 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
                const struct net_device_ops *ops = dev->netdev_ops;
 
                if (ops->ndo_select_queue)
-                       queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+                       queue_index = ops->ndo_select_queue(dev, skb, sb_dev,
                                                            __netdev_pick_tx);
                else
-                       queue_index = __netdev_pick_tx(dev, skb);
+                       queue_index = __netdev_pick_tx(dev, skb, sb_dev);
 
                queue_index = netdev_cap_txqueue(dev, queue_index);
        }
@@ -3467,7 +3691,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 /**
  *     __dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
- *     @accel_priv: private data used for L2 forwarding offload
+ *     @sb_dev: suboordinate device used for L2 forwarding offload
  *
  *     Queue a buffer for transmission to a network device. The caller must
  *     have set the device and priority and built the buffer before calling
@@ -3490,7 +3714,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
-static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
+static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 {
        struct net_device *dev = skb->dev;
        struct netdev_queue *txq;
@@ -3529,7 +3753,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
        else
                skb_dst_force(skb);
 
-       txq = netdev_pick_tx(dev, skb, accel_priv);
+       txq = netdev_pick_tx(dev, skb, sb_dev);
        q = rcu_dereference_bh(txq->qdisc);
 
        trace_net_dev_queue(skb);
@@ -3603,9 +3827,9 @@ int dev_queue_xmit(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dev_queue_xmit);
 
-int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
+int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
 {
-       return __dev_queue_xmit(skb, accel_priv);
+       return __dev_queue_xmit(skb, sb_dev);
 }
 EXPORT_SYMBOL(dev_queue_xmit_accel);
 
@@ -4028,7 +4252,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
        /* Reinjected packets coming from act_mirred or similar should
         * not get XDP generic processing.
         */
-       if (skb_cloned(skb))
+       if (skb_cloned(skb) || skb_is_tc_redirected(skb))
                return XDP_PASS;
 
        /* XDP packets must be linear and must have sufficient headroom
@@ -4378,6 +4602,10 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
                __skb_push(skb, skb->mac_len);
                skb_do_redirect(skb);
                return NULL;
+       case TC_ACT_REINSERT:
+               /* this does not scrub the packet, and updates stats on error */
+               skb_tc_reinsert(skb, &cl_res);
+               return NULL;
        default:
                break;
        }
@@ -4494,7 +4722,8 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
        return 0;
 }
 
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
+static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+                                   struct packet_type **ppt_prev)
 {
        struct packet_type *ptype, *pt_prev;
        rx_handler_func_t *rx_handler;
@@ -4624,8 +4853,7 @@ skip_classify:
        if (pt_prev) {
                if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
                        goto drop;
-               else
-                       ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+               *ppt_prev = pt_prev;
        } else {
 drop:
                if (!deliver_exact)
@@ -4643,6 +4871,18 @@ out:
        return ret;
 }
 
+static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
+{
+       struct net_device *orig_dev = skb->dev;
+       struct packet_type *pt_prev = NULL;
+       int ret;
+
+       ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+       if (pt_prev)
+               ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+       return ret;
+}
+
 /**
  *     netif_receive_skb_core - special purpose version of netif_receive_skb
  *     @skb: buffer to process
@@ -4663,13 +4903,72 @@ int netif_receive_skb_core(struct sk_buff *skb)
        int ret;
 
        rcu_read_lock();
-       ret = __netif_receive_skb_core(skb, false);
+       ret = __netif_receive_skb_one_core(skb, false);
        rcu_read_unlock();
 
        return ret;
 }
 EXPORT_SYMBOL(netif_receive_skb_core);
 
+static inline void __netif_receive_skb_list_ptype(struct list_head *head,
+                                                 struct packet_type *pt_prev,
+                                                 struct net_device *orig_dev)
+{
+       struct sk_buff *skb, *next;
+
+       if (!pt_prev)
+               return;
+       if (list_empty(head))
+               return;
+       if (pt_prev->list_func != NULL)
+               pt_prev->list_func(head, pt_prev, orig_dev);
+       else
+               list_for_each_entry_safe(skb, next, head, list)
+                       pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
+static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
+{
+       /* Fast-path assumptions:
+        * - There is no RX handler.
+        * - Only one packet_type matches.
+        * If either of these fails, we will end up doing some per-packet
+        * processing in-line, then handling the 'last ptype' for the whole
+        * sublist.  This can't cause out-of-order delivery to any single ptype,
+        * because the 'last ptype' must be constant across the sublist, and all
+        * other ptypes are handled per-packet.
+        */
+       /* Current (common) ptype of sublist */
+       struct packet_type *pt_curr = NULL;
+       /* Current (common) orig_dev of sublist */
+       struct net_device *od_curr = NULL;
+       struct list_head sublist;
+       struct sk_buff *skb, *next;
+
+       INIT_LIST_HEAD(&sublist);
+       list_for_each_entry_safe(skb, next, head, list) {
+               struct net_device *orig_dev = skb->dev;
+               struct packet_type *pt_prev = NULL;
+
+               list_del(&skb->list);
+               __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+               if (!pt_prev)
+                       continue;
+               if (pt_curr != pt_prev || od_curr != orig_dev) {
+                       /* dispatch old sublist */
+                       __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
+                       /* start new sublist */
+                       INIT_LIST_HEAD(&sublist);
+                       pt_curr = pt_prev;
+                       od_curr = orig_dev;
+               }
+               list_add_tail(&skb->list, &sublist);
+       }
+
+       /* dispatch final sublist */
+       __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
+}
+
 static int __netif_receive_skb(struct sk_buff *skb)
 {
        int ret;
@@ -4687,14 +4986,44 @@ static int __netif_receive_skb(struct sk_buff *skb)
                 * context down to all allocation sites.
                 */
                noreclaim_flag = memalloc_noreclaim_save();
-               ret = __netif_receive_skb_core(skb, true);
+               ret = __netif_receive_skb_one_core(skb, true);
                memalloc_noreclaim_restore(noreclaim_flag);
        } else
-               ret = __netif_receive_skb_core(skb, false);
+               ret = __netif_receive_skb_one_core(skb, false);
 
        return ret;
 }
 
+static void __netif_receive_skb_list(struct list_head *head)
+{
+       unsigned long noreclaim_flag = 0;
+       struct sk_buff *skb, *next;
+       bool pfmemalloc = false; /* Is current sublist PF_MEMALLOC? */
+
+       list_for_each_entry_safe(skb, next, head, list) {
+               if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
+                       struct list_head sublist;
+
+                       /* Handle the previous sublist */
+                       list_cut_before(&sublist, head, &skb->list);
+                       if (!list_empty(&sublist))
+                               __netif_receive_skb_list_core(&sublist, pfmemalloc);
+                       pfmemalloc = !pfmemalloc;
+                       /* See comments in __netif_receive_skb */
+                       if (pfmemalloc)
+                               noreclaim_flag = memalloc_noreclaim_save();
+                       else
+                               memalloc_noreclaim_restore(noreclaim_flag);
+               }
+       }
+       /* Handle the remaining sublist */
+       if (!list_empty(head))
+               __netif_receive_skb_list_core(head, pfmemalloc);
+       /* Restore pflags */
+       if (pfmemalloc)
+               memalloc_noreclaim_restore(noreclaim_flag);
+}
+
 static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 {
        struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
@@ -4717,7 +5046,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
                break;
 
        case XDP_QUERY_PROG:
-               xdp->prog_attached = !!old;
                xdp->prog_id = old ? old->aux->id : 0;
                break;
 
@@ -4769,6 +5097,55 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
        return ret;
 }
 
+static void netif_receive_skb_list_internal(struct list_head *head)
+{
+       struct bpf_prog *xdp_prog = NULL;
+       struct sk_buff *skb, *next;
+       struct list_head sublist;
+
+       INIT_LIST_HEAD(&sublist);
+       list_for_each_entry_safe(skb, next, head, list) {
+               net_timestamp_check(netdev_tstamp_prequeue, skb);
+               list_del(&skb->list);
+               if (!skb_defer_rx_timestamp(skb))
+                       list_add_tail(&skb->list, &sublist);
+       }
+       list_splice_init(&sublist, head);
+
+       if (static_branch_unlikely(&generic_xdp_needed_key)) {
+               preempt_disable();
+               rcu_read_lock();
+               list_for_each_entry_safe(skb, next, head, list) {
+                       xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+                       list_del(&skb->list);
+                       if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
+                               list_add_tail(&skb->list, &sublist);
+               }
+               rcu_read_unlock();
+               preempt_enable();
+               /* Put passed packets back on main list */
+               list_splice_init(&sublist, head);
+       }
+
+       rcu_read_lock();
+#ifdef CONFIG_RPS
+       if (static_key_false(&rps_needed)) {
+               list_for_each_entry_safe(skb, next, head, list) {
+                       struct rps_dev_flow voidflow, *rflow = &voidflow;
+                       int cpu = get_rps_cpu(skb->dev, skb, &rflow);
+
+                       if (cpu >= 0) {
+                               /* Will be handled, remove from list */
+                               list_del(&skb->list);
+                               enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+                       }
+               }
+       }
+#endif
+       __netif_receive_skb_list(head);
+       rcu_read_unlock();
+}
+
 /**
  *     netif_receive_skb - process receive buffer from network
  *     @skb: buffer to process
@@ -4792,6 +5169,28 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
+/**
+ *     netif_receive_skb_list - process many receive buffers from network
+ *     @head: list of skbs to process.
+ *
+ *     Since return value of netif_receive_skb() is normally ignored, and
+ *     wouldn't be meaningful for a list, this function returns void.
+ *
+ *     This function may only be called from softirq context and interrupts
+ *     should be enabled.
+ */
+void netif_receive_skb_list(struct list_head *head)
+{
+       struct sk_buff *skb;
+
+       if (list_empty(head))
+               return;
+       list_for_each_entry(skb, head, list)
+               trace_netif_receive_skb_list_entry(skb);
+       netif_receive_skb_list_internal(head);
+}
+EXPORT_SYMBOL(netif_receive_skb_list);
+
 DEFINE_PER_CPU(struct work_struct, flush_works);
 
 /* Network device is going away, flush any packets still pending */
@@ -4875,42 +5274,50 @@ out:
        return netif_receive_skb_internal(skb);
 }
 
-/* napi->gro_list contains packets ordered by age.
- * youngest packets at the head of it.
- * Complete skbs in reverse order to reduce latencies.
- */
-void napi_gro_flush(struct napi_struct *napi, bool flush_old)
+static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
+                                  bool flush_old)
 {
-       struct sk_buff *skb, *prev = NULL;
-
-       /* scan list and build reverse chain */
-       for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
-               skb->prev = prev;
-               prev = skb;
-       }
-
-       for (skb = prev; skb; skb = prev) {
-               skb->next = NULL;
+       struct list_head *head = &napi->gro_hash[index].list;
+       struct sk_buff *skb, *p;
 
+       list_for_each_entry_safe_reverse(skb, p, head, list) {
                if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
                        return;
-
-               prev = skb->prev;
+               list_del(&skb->list);
+               skb->next = NULL;
                napi_gro_complete(skb);
-               napi->gro_count--;
+               napi->gro_hash[index].count--;
        }
 
-       napi->gro_list = NULL;
+       if (!napi->gro_hash[index].count)
+               __clear_bit(index, &napi->gro_bitmask);
+}
+
+/* napi->gro_hash[].list contains packets ordered by age.
+ * youngest packets at the head of it.
+ * Complete skbs in reverse order to reduce latencies.
+ */
+void napi_gro_flush(struct napi_struct *napi, bool flush_old)
+{
+       u32 i;
+
+       for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+               if (test_bit(i, &napi->gro_bitmask))
+                       __napi_gro_flush_chain(napi, i, flush_old);
+       }
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
+static struct list_head *gro_list_prepare(struct napi_struct *napi,
+                                         struct sk_buff *skb)
 {
-       struct sk_buff *p;
        unsigned int maclen = skb->dev->hard_header_len;
        u32 hash = skb_get_hash_raw(skb);
+       struct list_head *head;
+       struct sk_buff *p;
 
-       for (p = napi->gro_list; p; p = p->next) {
+       head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
+       list_for_each_entry(p, head, list) {
                unsigned long diffs;
 
                NAPI_GRO_CB(p)->flush = 0;
@@ -4933,6 +5340,8 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
                                       maclen);
                NAPI_GRO_CB(p)->same_flow = !diffs;
        }
+
+       return head;
 }
 
 static void skb_gro_reset_offset(struct sk_buff *skb)
@@ -4975,20 +5384,41 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
        }
 }
 
+static void gro_flush_oldest(struct list_head *head)
+{
+       struct sk_buff *oldest;
+
+       oldest = list_last_entry(head, struct sk_buff, list);
+
+       /* We are called with head length >= MAX_GRO_SKBS, so this is
+        * impossible.
+        */
+       if (WARN_ON_ONCE(!oldest))
+               return;
+
+       /* Do not adjust napi->gro_hash[].count, caller is adding a new
+        * SKB to the chain.
+        */
+       list_del(&oldest->list);
+       napi_gro_complete(oldest);
+}
+
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-       struct sk_buff **pp = NULL;
+       u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+       struct list_head *head = &offload_base;
        struct packet_offload *ptype;
        __be16 type = skb->protocol;
-       struct list_head *head = &offload_base;
-       int same_flow;
+       struct list_head *gro_head;
+       struct sk_buff *pp = NULL;
        enum gro_result ret;
+       int same_flow;
        int grow;
 
        if (netif_elide_gro(skb->dev))
                goto normal;
 
-       gro_list_prepare(napi, skb);
+       gro_head = gro_list_prepare(napi, skb);
 
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, head, list) {
@@ -5022,7 +5452,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
                        NAPI_GRO_CB(skb)->csum_valid = 0;
                }
 
-               pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
+               pp = ptype->callbacks.gro_receive(gro_head, skb);
                break;
        }
        rcu_read_unlock();
@@ -5039,12 +5469,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
        ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
 
        if (pp) {
-               struct sk_buff *nskb = *pp;
-
-               *pp = nskb->next;
-               nskb->next = NULL;
-               napi_gro_complete(nskb);
-               napi->gro_count--;
+               list_del(&pp->list);
+               pp->next = NULL;
+               napi_gro_complete(pp);
+               napi->gro_hash[hash].count--;
        }
 
        if (same_flow)
@@ -5053,26 +5481,16 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
        if (NAPI_GRO_CB(skb)->flush)
                goto normal;
 
-       if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
-               struct sk_buff *nskb = napi->gro_list;
-
-               /* locate the end of the list to select the 'oldest' flow */
-               while (nskb->next) {
-                       pp = &nskb->next;
-                       nskb = *pp;
-               }
-               *pp = NULL;
-               nskb->next = NULL;
-               napi_gro_complete(nskb);
+       if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
+               gro_flush_oldest(gro_head);
        } else {
-               napi->gro_count++;
+               napi->gro_hash[hash].count++;
        }
        NAPI_GRO_CB(skb)->count = 1;
        NAPI_GRO_CB(skb)->age = jiffies;
        NAPI_GRO_CB(skb)->last = skb;
        skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-       skb->next = napi->gro_list;
-       napi->gro_list = skb;
+       list_add(&skb->list, gro_head);
        ret = GRO_HELD;
 
 pull:
@@ -5080,6 +5498,13 @@ pull:
        if (grow > 0)
                gro_pull_from_frag0(skb, grow);
 ok:
+       if (napi->gro_hash[hash].count) {
+               if (!test_bit(hash, &napi->gro_bitmask))
+                       __set_bit(hash, &napi->gro_bitmask);
+       } else if (test_bit(hash, &napi->gro_bitmask)) {
+               __clear_bit(hash, &napi->gro_bitmask);
+       }
+
        return ret;
 
 normal:
@@ -5478,7 +5903,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
                                 NAPIF_STATE_IN_BUSY_POLL)))
                return false;
 
-       if (n->gro_list) {
+       if (n->gro_bitmask) {
                unsigned long timeout = 0;
 
                if (work_done)
@@ -5687,21 +6112,31 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
        /* Note : we use a relaxed variant of napi_schedule_prep() not setting
         * NAPI_STATE_MISSED, since we do not react to a device IRQ.
         */
-       if (napi->gro_list && !napi_disable_pending(napi) &&
+       if (napi->gro_bitmask && !napi_disable_pending(napi) &&
            !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
                __napi_schedule_irqoff(napi);
 
        return HRTIMER_NORESTART;
 }
 
+static void init_gro_hash(struct napi_struct *napi)
+{
+       int i;
+
+       for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+               INIT_LIST_HEAD(&napi->gro_hash[i].list);
+               napi->gro_hash[i].count = 0;
+       }
+       napi->gro_bitmask = 0;
+}
+
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
                    int (*poll)(struct napi_struct *, int), int weight)
 {
        INIT_LIST_HEAD(&napi->poll_list);
        hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
        napi->timer.function = napi_watchdog;
-       napi->gro_count = 0;
-       napi->gro_list = NULL;
+       init_gro_hash(napi);
        napi->skb = NULL;
        napi->poll = poll;
        if (weight > NAPI_POLL_WEIGHT)
@@ -5734,6 +6169,19 @@ void napi_disable(struct napi_struct *n)
 }
 EXPORT_SYMBOL(napi_disable);
 
+static void flush_gro_hash(struct napi_struct *napi)
+{
+       int i;
+
+       for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+               struct sk_buff *skb, *n;
+
+               list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
+                       kfree_skb(skb);
+               napi->gro_hash[i].count = 0;
+       }
+}
+
 /* Must be called in process context */
 void netif_napi_del(struct napi_struct *napi)
 {
@@ -5743,9 +6191,8 @@ void netif_napi_del(struct napi_struct *napi)
        list_del_init(&napi->dev_list);
        napi_free_frags(napi);
 
-       kfree_skb_list(napi->gro_list);
-       napi->gro_list = NULL;
-       napi->gro_count = 0;
+       flush_gro_hash(napi);
+       napi->gro_bitmask = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
 
@@ -5787,7 +6234,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
                goto out_unlock;
        }
 
-       if (n->gro_list) {
+       if (n->gro_bitmask) {
                /* flush too old packets
                 * If HZ < 1000, flush all packets.
                 */
@@ -7080,13 +7527,15 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu)
 EXPORT_SYMBOL(__dev_set_mtu);
 
 /**
- *     dev_set_mtu - Change maximum transfer unit
+ *     dev_set_mtu_ext - Change maximum transfer unit
  *     @dev: device
  *     @new_mtu: new transfer unit
+ *     @extack: netlink extended ack
  *
  *     Change the maximum transfer size of the network device.
  */
-int dev_set_mtu(struct net_device *dev, int new_mtu)
+int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
+                   struct netlink_ext_ack *extack)
 {
        int err, orig_mtu;
 
@@ -7095,14 +7544,12 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 
        /* MTU must be positive, and in range */
        if (new_mtu < 0 || new_mtu < dev->min_mtu) {
-               net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n",
-                                   dev->name, new_mtu, dev->min_mtu);
+               NL_SET_ERR_MSG(extack, "mtu less than device minimum");
                return -EINVAL;
        }
 
        if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
-               net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n",
-                                   dev->name, new_mtu, dev->max_mtu);
+               NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
                return -EINVAL;
        }
 
@@ -7130,6 +7577,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
        }
        return err;
 }
+
+int dev_set_mtu(struct net_device *dev, int new_mtu)
+{
+       struct netlink_ext_ack extack;
+       int err;
+
+       err = dev_set_mtu_ext(dev, new_mtu, &extack);
+       if (err)
+               net_err_ratelimited("%s: %s\n", dev->name, extack._msg);
+       return err;
+}
 EXPORT_SYMBOL(dev_set_mtu);
 
 /**
@@ -7279,23 +7737,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
-void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
-                    struct netdev_bpf *xdp)
+u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
+                   enum bpf_netdev_command cmd)
 {
-       memset(xdp, 0, sizeof(*xdp));
-       xdp->command = XDP_QUERY_PROG;
+       struct netdev_bpf xdp;
 
-       /* Query must always succeed. */
-       WARN_ON(bpf_op(dev, xdp) < 0);
-}
+       if (!bpf_op)
+               return 0;
 
-static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
-{
-       struct netdev_bpf xdp;
+       memset(&xdp, 0, sizeof(xdp));
+       xdp.command = cmd;
 
-       __dev_xdp_query(dev, bpf_op, &xdp);
+       /* Query must always succeed. */
+       WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
 
-       return xdp.prog_attached;
+       return xdp.prog_id;
 }
 
 static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
@@ -7329,12 +7785,19 @@ static void dev_xdp_uninstall(struct net_device *dev)
        if (!ndo_bpf)
                return;
 
-       __dev_xdp_query(dev, ndo_bpf, &xdp);
-       if (xdp.prog_attached == XDP_ATTACHED_NONE)
-               return;
+       memset(&xdp, 0, sizeof(xdp));
+       xdp.command = XDP_QUERY_PROG;
+       WARN_ON(ndo_bpf(dev, &xdp));
+       if (xdp.prog_id)
+               WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
+                                       NULL));
 
-       /* Program removal should always succeed */
-       WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL));
+       /* Remove HW offload */
+       memset(&xdp, 0, sizeof(xdp));
+       xdp.command = XDP_QUERY_PROG_HW;
+       if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
+               WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
+                                       NULL));
 }
 
 /**
@@ -7350,12 +7813,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                      int fd, u32 flags)
 {
        const struct net_device_ops *ops = dev->netdev_ops;
+       enum bpf_netdev_command query;
        struct bpf_prog *prog = NULL;
        bpf_op_t bpf_op, bpf_chk;
        int err;
 
        ASSERT_RTNL();
 
+       query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+
        bpf_op = bpf_chk = ops->ndo_bpf;
        if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
                return -EOPNOTSUPP;
@@ -7365,10 +7831,11 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                bpf_chk = generic_xdp_install;
 
        if (fd >= 0) {
-               if (bpf_chk && __dev_xdp_attached(dev, bpf_chk))
+               if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) ||
+                   __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW))
                        return -EEXIST;
                if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-                   __dev_xdp_attached(dev, bpf_op))
+                   __dev_xdp_query(dev, bpf_op, query))
                        return -EBUSY;
 
                prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
@@ -8837,6 +9304,9 @@ static struct hlist_head * __net_init netdev_create_hash(void)
 /* Initialize per network namespace state */
 static int __net_init netdev_init(struct net *net)
 {
+       BUILD_BUG_ON(GRO_HASH_BUCKETS >
+                    8 * FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+
        if (net != &init_net)
                INIT_LIST_HEAD(&net->dev_base_head);
 
@@ -9107,6 +9577,7 @@ static int __init net_dev_init(void)
                sd->cpu = i;
 #endif
 
+               init_gro_hash(&sd->backlog);
                sd->backlog.poll = process_backlog;
                sd->backlog.weight = weight_p;
        }
index 50537ff961a722e18731b7b9671deb739bfce847..90e8aa36881e1563fb6870384316a1d025979cdd 100644 (file)
@@ -284,12 +284,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
        case SIOCSIFTXQLEN:
                if (ifr->ifr_qlen < 0)
                        return -EINVAL;
-               if (dev->tx_queue_len ^ ifr->ifr_qlen) {
-                       err = dev_change_tx_queue_len(dev, ifr->ifr_qlen);
-                       if (err)
-                               return err;
-               }
-               return 0;
+               return dev_change_tx_queue_len(dev, ifr->ifr_qlen);
 
        case SIOCSIFNAME:
                ifr->ifr_newname[IFNAMSIZ-1] = '\0';
index 22099705cc4108aa3881e5372c4c070c0032afb0..65fc366a78a4c455a02e74bdc30c9249698eb488 100644 (file)
@@ -326,6 +326,57 @@ devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
                                                  pool_type, p_tc_index);
 }
 
+struct devlink_region {
+       struct devlink *devlink;
+       struct list_head list;
+       const char *name;
+       struct list_head snapshot_list;
+       u32 max_snapshots;
+       u32 cur_snapshots;
+       u64 size;
+};
+
+struct devlink_snapshot {
+       struct list_head list;
+       struct devlink_region *region;
+       devlink_snapshot_data_dest_t *data_destructor;
+       u64 data_len;
+       u8 *data;
+       u32 id;
+};
+
+static struct devlink_region *
+devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
+{
+       struct devlink_region *region;
+
+       list_for_each_entry(region, &devlink->region_list, list)
+               if (!strcmp(region->name, region_name))
+                       return region;
+
+       return NULL;
+}
+
+static struct devlink_snapshot *
+devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
+{
+       struct devlink_snapshot *snapshot;
+
+       list_for_each_entry(snapshot, &region->snapshot_list, list)
+               if (snapshot->id == id)
+                       return snapshot;
+
+       return NULL;
+}
+
+static void devlink_region_snapshot_del(struct devlink_snapshot *snapshot)
+{
+       snapshot->region->cur_snapshots--;
+       list_del(&snapshot->list);
+       (*snapshot->data_destructor)(snapshot->data);
+       kfree(snapshot);
+}
+
 #define DEVLINK_NL_FLAG_NEED_DEVLINK   BIT(0)
 #define DEVLINK_NL_FLAG_NEED_PORT      BIT(1)
 #define DEVLINK_NL_FLAG_NEED_SB                BIT(2)
@@ -2604,247 +2655,1204 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
        return devlink->ops->reload(devlink, info->extack);
 }
 
-static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
-       [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
-       [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
-       [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
-       [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 },
-       [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
-       [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
-       [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
-       [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 },
-       [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 },
-       [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
-       [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
-       [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
-       [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
-       [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
-       [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
-       [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
-       [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
-       [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
-       [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
-};
-
-static const struct genl_ops devlink_nl_ops[] = {
-       {
-               .cmd = DEVLINK_CMD_GET,
-               .doit = devlink_nl_cmd_get_doit,
-               .dumpit = devlink_nl_cmd_get_dumpit,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-               /* can be retrieved by unprivileged users */
-       },
-       {
-               .cmd = DEVLINK_CMD_PORT_GET,
-               .doit = devlink_nl_cmd_port_get_doit,
-               .dumpit = devlink_nl_cmd_port_get_dumpit,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
-               /* can be retrieved by unprivileged users */
-       },
-       {
-               .cmd = DEVLINK_CMD_PORT_SET,
-               .doit = devlink_nl_cmd_port_set_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
-       },
-       {
-               .cmd = DEVLINK_CMD_PORT_SPLIT,
-               .doit = devlink_nl_cmd_port_split_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NO_LOCK,
-       },
-       {
-               .cmd = DEVLINK_CMD_PORT_UNSPLIT,
-               .doit = devlink_nl_cmd_port_unsplit_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NO_LOCK,
-       },
-       {
-               .cmd = DEVLINK_CMD_SB_GET,
-               .doit = devlink_nl_cmd_sb_get_doit,
-               .dumpit = devlink_nl_cmd_sb_get_dumpit,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NEED_SB,
-               /* can be retrieved by unprivileged users */
-       },
-       {
-               .cmd = DEVLINK_CMD_SB_POOL_GET,
-               .doit = devlink_nl_cmd_sb_pool_get_doit,
-               .dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NEED_SB,
-               /* can be retrieved by unprivileged users */
-       },
-       {
-               .cmd = DEVLINK_CMD_SB_POOL_SET,
-               .doit = devlink_nl_cmd_sb_pool_set_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NEED_SB,
-       },
-       {
-               .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
-               .doit = devlink_nl_cmd_sb_port_pool_get_doit,
-               .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
-                                 DEVLINK_NL_FLAG_NEED_SB,
-               /* can be retrieved by unprivileged users */
-       },
-       {
-               .cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
-               .doit = devlink_nl_cmd_sb_port_pool_set_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
-                                 DEVLINK_NL_FLAG_NEED_SB,
-       },
+static const struct devlink_param devlink_param_generic[] = {
        {
-               .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
-               .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
-               .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
-                                 DEVLINK_NL_FLAG_NEED_SB,
-               /* can be retrieved by unprivileged users */
-       },
-       {
-               .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
-               .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
-                                 DEVLINK_NL_FLAG_NEED_SB,
-       },
-       {
-               .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
-               .doit = devlink_nl_cmd_sb_occ_snapshot_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NEED_SB,
-       },
-       {
-               .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
-               .doit = devlink_nl_cmd_sb_occ_max_clear_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NEED_SB,
-       },
-       {
-               .cmd = DEVLINK_CMD_ESWITCH_GET,
-               .doit = devlink_nl_cmd_eswitch_get_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-       },
-       {
-               .cmd = DEVLINK_CMD_ESWITCH_SET,
-               .doit = devlink_nl_cmd_eswitch_set_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NO_LOCK,
-       },
-       {
-               .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
-               .doit = devlink_nl_cmd_dpipe_table_get,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-               /* can be retrieved by unprivileged users */
-       },
-       {
-               .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
-               .doit = devlink_nl_cmd_dpipe_entries_get,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-               /* can be retrieved by unprivileged users */
-       },
-       {
-               .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
-               .doit = devlink_nl_cmd_dpipe_headers_get,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-               /* can be retrieved by unprivileged users */
-       },
-       {
-               .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
-               .doit = devlink_nl_cmd_dpipe_table_counters_set,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+               .name = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME,
+               .type = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE,
        },
        {
-               .cmd = DEVLINK_CMD_RESOURCE_SET,
-               .doit = devlink_nl_cmd_resource_set,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               .id = DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+               .name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME,
+               .type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE,
        },
        {
-               .cmd = DEVLINK_CMD_RESOURCE_DUMP,
-               .doit = devlink_nl_cmd_resource_dump,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-               /* can be retrieved by unprivileged users */
+               .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
+               .name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
+               .type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
        },
        {
-               .cmd = DEVLINK_CMD_RELOAD,
-               .doit = devlink_nl_cmd_reload,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NO_LOCK,
+               .id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+               .name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
+               .type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
        },
 };
 
-static struct genl_family devlink_nl_family __ro_after_init = {
-       .name           = DEVLINK_GENL_NAME,
-       .version        = DEVLINK_GENL_VERSION,
-       .maxattr        = DEVLINK_ATTR_MAX,
-       .netnsok        = true,
-       .pre_doit       = devlink_nl_pre_doit,
-       .post_doit      = devlink_nl_post_doit,
-       .module         = THIS_MODULE,
-       .ops            = devlink_nl_ops,
-       .n_ops          = ARRAY_SIZE(devlink_nl_ops),
-       .mcgrps         = devlink_nl_mcgrps,
-       .n_mcgrps       = ARRAY_SIZE(devlink_nl_mcgrps),
-};
+static int devlink_param_generic_verify(const struct devlink_param *param)
+{
+       /* verify it match generic parameter by id and name */
+       if (param->id > DEVLINK_PARAM_GENERIC_ID_MAX)
+               return -EINVAL;
+       if (strcmp(param->name, devlink_param_generic[param->id].name))
+               return -ENOENT;
 
-/**
- *     devlink_alloc - Allocate new devlink instance resources
- *
- *     @ops: ops
- *     @priv_size: size of user private data
- *
- *     Allocate new devlink instance resources, including devlink index
- *     and name.
- */
-struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
+       WARN_ON(param->type != devlink_param_generic[param->id].type);
+
+       return 0;
+}
+
+static int devlink_param_driver_verify(const struct devlink_param *param)
 {
-       struct devlink *devlink;
+       int i;
 
-       devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
-       if (!devlink)
-               return NULL;
-       devlink->ops = ops;
-       devlink_net_set(devlink, &init_net);
-       INIT_LIST_HEAD(&devlink->port_list);
-       INIT_LIST_HEAD(&devlink->sb_list);
+       if (param->id <= DEVLINK_PARAM_GENERIC_ID_MAX)
+               return -EINVAL;
+       /* verify no such name in generic params */
+       for (i = 0; i <= DEVLINK_PARAM_GENERIC_ID_MAX; i++)
+               if (!strcmp(param->name, devlink_param_generic[i].name))
+                       return -EEXIST;
+
+       return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_name(struct list_head *param_list,
+                          const char *param_name)
+{
+       struct devlink_param_item *param_item;
+
+       list_for_each_entry(param_item, param_list, list)
+               if (!strcmp(param_item->param->name, param_name))
+                       return param_item;
+       return NULL;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_id(struct list_head *param_list, u32 param_id)
+{
+       struct devlink_param_item *param_item;
+
+       list_for_each_entry(param_item, param_list, list)
+               if (param_item->param->id == param_id)
+                       return param_item;
+       return NULL;
+}
+
+static bool
+devlink_param_cmode_is_supported(const struct devlink_param *param,
+                                enum devlink_param_cmode cmode)
+{
+       return test_bit(cmode, &param->supported_cmodes);
+}
+
+static int devlink_param_get(struct devlink *devlink,
+                            const struct devlink_param *param,
+                            struct devlink_param_gset_ctx *ctx)
+{
+       if (!param->get)
+               return -EOPNOTSUPP;
+       return param->get(devlink, param->id, ctx);
+}
+
+static int devlink_param_set(struct devlink *devlink,
+                            const struct devlink_param *param,
+                            struct devlink_param_gset_ctx *ctx)
+{
+       if (!param->set)
+               return -EOPNOTSUPP;
+       return param->set(devlink, param->id, ctx);
+}
+
+static int
+devlink_param_type_to_nla_type(enum devlink_param_type param_type)
+{
+       switch (param_type) {
+       case DEVLINK_PARAM_TYPE_U8:
+               return NLA_U8;
+       case DEVLINK_PARAM_TYPE_U16:
+               return NLA_U16;
+       case DEVLINK_PARAM_TYPE_U32:
+               return NLA_U32;
+       case DEVLINK_PARAM_TYPE_STRING:
+               return NLA_STRING;
+       case DEVLINK_PARAM_TYPE_BOOL:
+               return NLA_FLAG;
+       default:
+               return -EINVAL;
+       }
+}
+
+static int
+devlink_nl_param_value_fill_one(struct sk_buff *msg,
+                               enum devlink_param_type type,
+                               enum devlink_param_cmode cmode,
+                               union devlink_param_value val)
+{
+       struct nlattr *param_value_attr;
+
+       param_value_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUE);
+       if (!param_value_attr)
+               goto nla_put_failure;
+
+       if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_CMODE, cmode))
+               goto value_nest_cancel;
+
+       switch (type) {
+       case DEVLINK_PARAM_TYPE_U8:
+               if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu8))
+                       goto value_nest_cancel;
+               break;
+       case DEVLINK_PARAM_TYPE_U16:
+               if (nla_put_u16(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu16))
+                       goto value_nest_cancel;
+               break;
+       case DEVLINK_PARAM_TYPE_U32:
+               if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32))
+                       goto value_nest_cancel;
+               break;
+       case DEVLINK_PARAM_TYPE_STRING:
+               if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
+                                  val.vstr))
+                       goto value_nest_cancel;
+               break;
+       case DEVLINK_PARAM_TYPE_BOOL:
+               if (val.vbool &&
+                   nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA))
+                       goto value_nest_cancel;
+               break;
+       }
+
+       nla_nest_end(msg, param_value_attr);
+       return 0;
+
+value_nest_cancel:
+       nla_nest_cancel(msg, param_value_attr);
+nla_put_failure:
+       return -EMSGSIZE;
+}
+
+static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
+                                struct devlink_param_item *param_item,
+                                enum devlink_command cmd,
+                                u32 portid, u32 seq, int flags)
+{
+       union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
+       const struct devlink_param *param = param_item->param;
+       struct devlink_param_gset_ctx ctx;
+       struct nlattr *param_values_list;
+       struct nlattr *param_attr;
+       int nla_type;
+       void *hdr;
+       int err;
+       int i;
+
+       /* Get value from driver part to driverinit configuration mode */
+       for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+               if (!devlink_param_cmode_is_supported(param, i))
+                       continue;
+               if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+                       if (!param_item->driverinit_value_valid)
+                               return -EOPNOTSUPP;
+                       param_value[i] = param_item->driverinit_value;
+               } else {
+                       ctx.cmode = i;
+                       err = devlink_param_get(devlink, param, &ctx);
+                       if (err)
+                               return err;
+                       param_value[i] = ctx.val;
+               }
+       }
+
+       hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       if (devlink_nl_put_handle(msg, devlink))
+               goto genlmsg_cancel;
+       param_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM);
+       if (!param_attr)
+               goto genlmsg_cancel;
+       if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name))
+               goto param_nest_cancel;
+       if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC))
+               goto param_nest_cancel;
+
+       nla_type = devlink_param_type_to_nla_type(param->type);
+       if (nla_type < 0)
+               goto param_nest_cancel;
+       if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type))
+               goto param_nest_cancel;
+
+       param_values_list = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUES_LIST);
+       if (!param_values_list)
+               goto param_nest_cancel;
+
+       for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+               if (!devlink_param_cmode_is_supported(param, i))
+                       continue;
+               err = devlink_nl_param_value_fill_one(msg, param->type,
+                                                     i, param_value[i]);
+               if (err)
+                       goto values_list_nest_cancel;
+       }
+
+       nla_nest_end(msg, param_values_list);
+       nla_nest_end(msg, param_attr);
+       genlmsg_end(msg, hdr);
+       return 0;
+
+values_list_nest_cancel:
+       nla_nest_end(msg, param_values_list);
+param_nest_cancel:
+       nla_nest_cancel(msg, param_attr);
+genlmsg_cancel:
+       genlmsg_cancel(msg, hdr);
+       return -EMSGSIZE;
+}
+
+static void devlink_param_notify(struct devlink *devlink,
+                                struct devlink_param_item *param_item,
+                                enum devlink_command cmd)
+{
+       struct sk_buff *msg;
+       int err;
+
+       WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL);
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return;
+       err = devlink_nl_param_fill(msg, devlink, param_item, cmd, 0, 0, 0);
+       if (err) {
+               nlmsg_free(msg);
+               return;
+       }
+
+       genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+                               msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
+                                          struct netlink_callback *cb)
+{
+       struct devlink_param_item *param_item;
+       struct devlink *devlink;
+       int start = cb->args[0];
+       int idx = 0;
+       int err;
+
+       mutex_lock(&devlink_mutex);
+       list_for_each_entry(devlink, &devlink_list, list) {
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       continue;
+               mutex_lock(&devlink->lock);
+               list_for_each_entry(param_item, &devlink->param_list, list) {
+                       if (idx < start) {
+                               idx++;
+                               continue;
+                       }
+                       err = devlink_nl_param_fill(msg, devlink, param_item,
+                                                   DEVLINK_CMD_PARAM_GET,
+                                                   NETLINK_CB(cb->skb).portid,
+                                                   cb->nlh->nlmsg_seq,
+                                                   NLM_F_MULTI);
+                       if (err) {
+                               mutex_unlock(&devlink->lock);
+                               goto out;
+                       }
+                       idx++;
+               }
+               mutex_unlock(&devlink->lock);
+       }
+out:
+       mutex_unlock(&devlink_mutex);
+
+       cb->args[0] = idx;
+       return msg->len;
+}
+
+static int
+devlink_param_type_get_from_info(struct genl_info *info,
+                                enum devlink_param_type *param_type)
+{
+       if (!info->attrs[DEVLINK_ATTR_PARAM_TYPE])
+               return -EINVAL;
+
+       switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) {
+       case NLA_U8:
+               *param_type = DEVLINK_PARAM_TYPE_U8;
+               break;
+       case NLA_U16:
+               *param_type = DEVLINK_PARAM_TYPE_U16;
+               break;
+       case NLA_U32:
+               *param_type = DEVLINK_PARAM_TYPE_U32;
+               break;
+       case NLA_STRING:
+               *param_type = DEVLINK_PARAM_TYPE_STRING;
+               break;
+       case NLA_FLAG:
+               *param_type = DEVLINK_PARAM_TYPE_BOOL;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int
+devlink_param_value_get_from_info(const struct devlink_param *param,
+                                 struct genl_info *info,
+                                 union devlink_param_value *value)
+{
+       if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
+           !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
+               return -EINVAL;
+
+       switch (param->type) {
+       case DEVLINK_PARAM_TYPE_U8:
+               value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+               break;
+       case DEVLINK_PARAM_TYPE_U16:
+               value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+               break;
+       case DEVLINK_PARAM_TYPE_U32:
+               value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+               break;
+       case DEVLINK_PARAM_TYPE_STRING:
+               if (nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) >
+                   DEVLINK_PARAM_MAX_STRING_VALUE)
+                       return -EINVAL;
+               value->vstr = nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+               break;
+       case DEVLINK_PARAM_TYPE_BOOL:
+               value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
+                              true : false;
+               break;
+       }
+       return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_get_from_info(struct devlink *devlink,
+                           struct genl_info *info)
+{
+       char *param_name;
+
+       if (!info->attrs[DEVLINK_ATTR_PARAM_NAME])
+               return NULL;
+
+       param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
+       return devlink_param_find_by_name(&devlink->param_list, param_name);
+}
+
+static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
+                                        struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       struct devlink_param_item *param_item;
+       struct sk_buff *msg;
+       int err;
+
+       param_item = devlink_param_get_from_info(devlink, info);
+       if (!param_item)
+               return -EINVAL;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       err = devlink_nl_param_fill(msg, devlink, param_item,
+                                   DEVLINK_CMD_PARAM_GET,
+                                   info->snd_portid, info->snd_seq, 0);
+       if (err) {
+               nlmsg_free(msg);
+               return err;
+       }
+
+       return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
+                                        struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       enum devlink_param_type param_type;
+       struct devlink_param_gset_ctx ctx;
+       enum devlink_param_cmode cmode;
+       struct devlink_param_item *param_item;
+       const struct devlink_param *param;
+       union devlink_param_value value;
+       int err = 0;
+
+       param_item = devlink_param_get_from_info(devlink, info);
+       if (!param_item)
+               return -EINVAL;
+       param = param_item->param;
+       err = devlink_param_type_get_from_info(info, &param_type);
+       if (err)
+               return err;
+       if (param_type != param->type)
+               return -EINVAL;
+       err = devlink_param_value_get_from_info(param, info, &value);
+       if (err)
+               return err;
+       if (param->validate) {
+               err = param->validate(devlink, param->id, value, info->extack);
+               if (err)
+                       return err;
+       }
+
+       if (!info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE])
+               return -EINVAL;
+       cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]);
+       if (!devlink_param_cmode_is_supported(param, cmode))
+               return -EOPNOTSUPP;
+
+       if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+               param_item->driverinit_value = value;
+               param_item->driverinit_value_valid = true;
+       } else {
+               if (!param->set)
+                       return -EOPNOTSUPP;
+               ctx.val = value;
+               ctx.cmode = cmode;
+               err = devlink_param_set(devlink, param, &ctx);
+               if (err)
+                       return err;
+       }
+
+       devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+       return 0;
+}
+
+static int devlink_param_register_one(struct devlink *devlink,
+                                     const struct devlink_param *param)
+{
+       struct devlink_param_item *param_item;
+
+       if (devlink_param_find_by_name(&devlink->param_list,
+                                      param->name))
+               return -EEXIST;
+
+       if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+               WARN_ON(param->get || param->set);
+       else
+               WARN_ON(!param->get || !param->set);
+
+       param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+       if (!param_item)
+               return -ENOMEM;
+       param_item->param = param;
+
+       list_add_tail(&param_item->list, &devlink->param_list);
+       devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+       return 0;
+}
+
+static void devlink_param_unregister_one(struct devlink *devlink,
+                                        const struct devlink_param *param)
+{
+       struct devlink_param_item *param_item;
+
+       param_item = devlink_param_find_by_name(&devlink->param_list,
+                                               param->name);
+       WARN_ON(!param_item);
+       devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_DEL);
+       list_del(&param_item->list);
+       kfree(param_item);
+}
+
+static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
+                                            struct devlink *devlink,
+                                            struct devlink_snapshot *snapshot)
+{
+       struct nlattr *snap_attr;
+       int err;
+
+       snap_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
+       if (!snap_attr)
+               return -EINVAL;
+
+       err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
+       if (err)
+               goto nla_put_failure;
+
+       nla_nest_end(msg, snap_attr);
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(msg, snap_attr);
+       return err;
+}
+
+static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
+                                             struct devlink *devlink,
+                                             struct devlink_region *region)
+{
+       struct devlink_snapshot *snapshot;
+       struct nlattr *snapshots_attr;
+       int err;
+
+       snapshots_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOTS);
+       if (!snapshots_attr)
+               return -EINVAL;
+
+       list_for_each_entry(snapshot, &region->snapshot_list, list) {
+               err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
+               if (err)
+                       goto nla_put_failure;
+       }
+
+       nla_nest_end(msg, snapshots_attr);
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(msg, snapshots_attr);
+       return err;
+}
+
+static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
+                                 enum devlink_command cmd, u32 portid,
+                                 u32 seq, int flags,
+                                 struct devlink_region *region)
+{
+       void *hdr;
+       int err;
+
+       hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       err = devlink_nl_put_handle(msg, devlink);
+       if (err)
+               goto nla_put_failure;
+
+       err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->name);
+       if (err)
+               goto nla_put_failure;
+
+       err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+                               region->size,
+                               DEVLINK_ATTR_PAD);
+       if (err)
+               goto nla_put_failure;
+
+       err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
+       if (err)
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(msg, hdr);
+       return err;
+}
+
+static void devlink_nl_region_notify(struct devlink_region *region,
+                                    struct devlink_snapshot *snapshot,
+                                    enum devlink_command cmd)
+{
+       struct devlink *devlink = region->devlink;
+       struct sk_buff *msg;
+       void *hdr;
+       int err;
+
+       WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return;
+
+       hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
+       if (!hdr)
+               goto out_free_msg;
+
+       err = devlink_nl_put_handle(msg, devlink);
+       if (err)
+               goto out_cancel_msg;
+
+       err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
+                            region->name);
+       if (err)
+               goto out_cancel_msg;
+
+       if (snapshot) {
+               err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID,
+                                 snapshot->id);
+               if (err)
+                       goto out_cancel_msg;
+       } else {
+               err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+                                       region->size, DEVLINK_ATTR_PAD);
+               if (err)
+                       goto out_cancel_msg;
+       }
+       genlmsg_end(msg, hdr);
+
+       genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+                               msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+
+       return;
+
+out_cancel_msg:
+       genlmsg_cancel(msg, hdr);
+out_free_msg:
+       nlmsg_free(msg);
+}
+
+static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
+                                         struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       struct devlink_region *region;
+       const char *region_name;
+       struct sk_buff *msg;
+       int err;
+
+       if (!info->attrs[DEVLINK_ATTR_REGION_NAME])
+               return -EINVAL;
+
+       region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+       region = devlink_region_get_by_name(devlink, region_name);
+       if (!region)
+               return -EINVAL;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET,
+                                    info->snd_portid, info->snd_seq, 0,
+                                    region);
+       if (err) {
+               nlmsg_free(msg);
+               return err;
+       }
+
+       return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
+                                           struct netlink_callback *cb)
+{
+       struct devlink_region *region;
+       struct devlink *devlink;
+       int start = cb->args[0];
+       int idx = 0;
+       int err;
+
+       mutex_lock(&devlink_mutex);
+       list_for_each_entry(devlink, &devlink_list, list) {
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       continue;
+
+               mutex_lock(&devlink->lock);
+               list_for_each_entry(region, &devlink->region_list, list) {
+                       if (idx < start) {
+                               idx++;
+                               continue;
+                       }
+                       err = devlink_nl_region_fill(msg, devlink,
+                                                    DEVLINK_CMD_REGION_GET,
+                                                    NETLINK_CB(cb->skb).portid,
+                                                    cb->nlh->nlmsg_seq,
+                                                    NLM_F_MULTI, region);
+                       if (err) {
+                               mutex_unlock(&devlink->lock);
+                               goto out;
+                       }
+                       idx++;
+               }
+               mutex_unlock(&devlink->lock);
+       }
+out:
+       mutex_unlock(&devlink_mutex);
+       cb->args[0] = idx;
+       return msg->len;
+}
+
+static int devlink_nl_cmd_region_del(struct sk_buff *skb,
+                                    struct genl_info *info)
+{
+       struct devlink *devlink = info->user_ptr[0];
+       struct devlink_snapshot *snapshot;
+       struct devlink_region *region;
+       const char *region_name;
+       u32 snapshot_id;
+
+       if (!info->attrs[DEVLINK_ATTR_REGION_NAME] ||
+           !info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+               return -EINVAL;
+
+       region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+       snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+
+       region = devlink_region_get_by_name(devlink, region_name);
+       if (!region)
+               return -EINVAL;
+
+       snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+       if (!snapshot)
+               return -EINVAL;
+
+       devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
+       devlink_region_snapshot_del(snapshot);
+       return 0;
+}
+
+static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
+                                                struct devlink *devlink,
+                                                u8 *chunk, u32 chunk_size,
+                                                u64 addr)
+{
+       struct nlattr *chunk_attr;
+       int err;
+
+       chunk_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_CHUNK);
+       if (!chunk_attr)
+               return -EINVAL;
+
+       err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk);
+       if (err)
+               goto nla_put_failure;
+
+       err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
+                               DEVLINK_ATTR_PAD);
+       if (err)
+               goto nla_put_failure;
+
+       nla_nest_end(msg, chunk_attr);
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(msg, chunk_attr);
+       return err;
+}
+
+#define DEVLINK_REGION_READ_CHUNK_SIZE 256
+
+static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
+                                               struct devlink *devlink,
+                                               struct devlink_region *region,
+                                               struct nlattr **attrs,
+                                               u64 start_offset,
+                                               u64 end_offset,
+                                               bool dump,
+                                               u64 *new_offset)
+{
+       struct devlink_snapshot *snapshot;
+       u64 curr_offset = start_offset;
+       u32 snapshot_id;
+       int err = 0;
+
+       *new_offset = start_offset;
+
+       snapshot_id = nla_get_u32(attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+       snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+       if (!snapshot)
+               return -EINVAL;
+
+       if (end_offset > snapshot->data_len || dump)
+               end_offset = snapshot->data_len;
+
+       while (curr_offset < end_offset) {
+               u32 data_size;
+               u8 *data;
+
+               if (end_offset - curr_offset < DEVLINK_REGION_READ_CHUNK_SIZE)
+                       data_size = end_offset - curr_offset;
+               else
+                       data_size = DEVLINK_REGION_READ_CHUNK_SIZE;
+
+               data = &snapshot->data[curr_offset];
+               err = devlink_nl_cmd_region_read_chunk_fill(skb, devlink,
+                                                           data, data_size,
+                                                           curr_offset);
+               if (err)
+                       break;
+
+               curr_offset += data_size;
+       }
+       *new_offset = curr_offset;
+
+       return err;
+}
+
+static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
+                                            struct netlink_callback *cb)
+{
+       u64 ret_offset, start_offset, end_offset = 0;
+       struct nlattr *attrs[DEVLINK_ATTR_MAX + 1];
+       const struct genl_ops *ops = cb->data;
+       struct devlink_region *region;
+       struct nlattr *chunks_attr;
+       const char *region_name;
+       struct devlink *devlink;
+       bool dump = true;
+       void *hdr;
+       int err;
+
+       start_offset = *((u64 *)&cb->args[0]);
+
+       err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize,
+                         attrs, DEVLINK_ATTR_MAX, ops->policy, NULL);
+       if (err)
+               goto out;
+
+       devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
+       if (IS_ERR(devlink))
+               goto out;
+
+       mutex_lock(&devlink_mutex);
+       mutex_lock(&devlink->lock);
+
+       if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
+           !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+               goto out_unlock;
+
+       region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]);
+       region = devlink_region_get_by_name(devlink, region_name);
+       if (!region)
+               goto out_unlock;
+
+       hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+                         &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
+                         DEVLINK_CMD_REGION_READ);
+       if (!hdr)
+               goto out_unlock;
+
+       err = devlink_nl_put_handle(skb, devlink);
+       if (err)
+               goto nla_put_failure;
+
+       err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
+       if (err)
+               goto nla_put_failure;
+
+       chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS);
+       if (!chunks_attr)
+               goto nla_put_failure;
+
+       if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
+           attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
+               if (!start_offset)
+                       start_offset =
+                               nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+
+               end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+               end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
+               dump = false;
+       }
+
+       err = devlink_nl_region_read_snapshot_fill(skb, devlink,
+                                                  region, attrs,
+                                                  start_offset,
+                                                  end_offset, dump,
+                                                  &ret_offset);
+
+       if (err && err != -EMSGSIZE)
+               goto nla_put_failure;
+
+       /* Check if there was any progress done to prevent infinite loop */
+       if (ret_offset == start_offset)
+               goto nla_put_failure;
+
+       *((u64 *)&cb->args[0]) = ret_offset;
+
+       nla_nest_end(skb, chunks_attr);
+       genlmsg_end(skb, hdr);
+       mutex_unlock(&devlink->lock);
+       mutex_unlock(&devlink_mutex);
+
+       return skb->len;
+
+nla_put_failure:
+       genlmsg_cancel(skb, hdr);
+out_unlock:
+       mutex_unlock(&devlink->lock);
+       mutex_unlock(&devlink_mutex);
+out:
+       return 0;
+}
+
+static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
+       [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
+       [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
+       [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 },
+       [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
+       [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 },
+       [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
+       [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
+       [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
+       [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
+       [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
+       [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
+       [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
+       [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
+       [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
+       [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
+       [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING },
+       [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
+       [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
+       [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
+       [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+};
+
+static const struct genl_ops devlink_nl_ops[] = {
+       {
+               .cmd = DEVLINK_CMD_GET,
+               .doit = devlink_nl_cmd_get_doit,
+               .dumpit = devlink_nl_cmd_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_PORT_GET,
+               .doit = devlink_nl_cmd_port_get_doit,
+               .dumpit = devlink_nl_cmd_port_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_PORT_SET,
+               .doit = devlink_nl_cmd_port_set_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+       },
+       {
+               .cmd = DEVLINK_CMD_PORT_SPLIT,
+               .doit = devlink_nl_cmd_port_split_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NO_LOCK,
+       },
+       {
+               .cmd = DEVLINK_CMD_PORT_UNSPLIT,
+               .doit = devlink_nl_cmd_port_unsplit_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NO_LOCK,
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_GET,
+               .doit = devlink_nl_cmd_sb_get_doit,
+               .dumpit = devlink_nl_cmd_sb_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_POOL_GET,
+               .doit = devlink_nl_cmd_sb_pool_get_doit,
+               .dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_POOL_SET,
+               .doit = devlink_nl_cmd_sb_pool_set_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+               .doit = devlink_nl_cmd_sb_port_pool_get_doit,
+               .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
+               .doit = devlink_nl_cmd_sb_port_pool_set_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+               .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
+               .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+               .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
+               .doit = devlink_nl_cmd_sb_occ_snapshot_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+       },
+       {
+               .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
+               .doit = devlink_nl_cmd_sb_occ_max_clear_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NEED_SB,
+       },
+       {
+               .cmd = DEVLINK_CMD_ESWITCH_GET,
+               .doit = devlink_nl_cmd_eswitch_get_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
+       {
+               .cmd = DEVLINK_CMD_ESWITCH_SET,
+               .doit = devlink_nl_cmd_eswitch_set_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NO_LOCK,
+       },
+       {
+               .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+               .doit = devlink_nl_cmd_dpipe_table_get,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+               .doit = devlink_nl_cmd_dpipe_entries_get,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+               .doit = devlink_nl_cmd_dpipe_headers_get,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+               .doit = devlink_nl_cmd_dpipe_table_counters_set,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
+       {
+               .cmd = DEVLINK_CMD_RESOURCE_SET,
+               .doit = devlink_nl_cmd_resource_set,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
+       {
+               .cmd = DEVLINK_CMD_RESOURCE_DUMP,
+               .doit = devlink_nl_cmd_resource_dump,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_RELOAD,
+               .doit = devlink_nl_cmd_reload,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+                                 DEVLINK_NL_FLAG_NO_LOCK,
+       },
+       {
+               .cmd = DEVLINK_CMD_PARAM_GET,
+               .doit = devlink_nl_cmd_param_get_doit,
+               .dumpit = devlink_nl_cmd_param_get_dumpit,
+               .policy = devlink_nl_policy,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+               /* can be retrieved by unprivileged users */
+       },
+       {
+               .cmd = DEVLINK_CMD_PARAM_SET,
+               .doit = devlink_nl_cmd_param_set_doit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
+       {
+               .cmd = DEVLINK_CMD_REGION_GET,
+               .doit = devlink_nl_cmd_region_get_doit,
+               .dumpit = devlink_nl_cmd_region_get_dumpit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
+       {
+               .cmd = DEVLINK_CMD_REGION_DEL,
+               .doit = devlink_nl_cmd_region_del,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
+       {
+               .cmd = DEVLINK_CMD_REGION_READ,
+               .dumpit = devlink_nl_cmd_region_read_dumpit,
+               .policy = devlink_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+       },
+};
+
+static struct genl_family devlink_nl_family __ro_after_init = {
+       .name           = DEVLINK_GENL_NAME,
+       .version        = DEVLINK_GENL_VERSION,
+       .maxattr        = DEVLINK_ATTR_MAX,
+       .netnsok        = true,
+       .pre_doit       = devlink_nl_pre_doit,
+       .post_doit      = devlink_nl_post_doit,
+       .module         = THIS_MODULE,
+       .ops            = devlink_nl_ops,
+       .n_ops          = ARRAY_SIZE(devlink_nl_ops),
+       .mcgrps         = devlink_nl_mcgrps,
+       .n_mcgrps       = ARRAY_SIZE(devlink_nl_mcgrps),
+};
+
+/**
+ *     devlink_alloc - Allocate new devlink instance resources
+ *
+ *     @ops: ops
+ *     @priv_size: size of user private data
+ *
+ *     Allocate new devlink instance resources, including devlink index
+ *     and name.
+ */
+struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
+{
+       struct devlink *devlink;
+
+       devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
+       if (!devlink)
+               return NULL;
+       devlink->ops = ops;
+       devlink_net_set(devlink, &init_net);
+       INIT_LIST_HEAD(&devlink->port_list);
+       INIT_LIST_HEAD(&devlink->sb_list);
        INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
        INIT_LIST_HEAD(&devlink->resource_list);
+       INIT_LIST_HEAD(&devlink->param_list);
+       INIT_LIST_HEAD(&devlink->region_list);
        mutex_init(&devlink->lock);
        return devlink;
 }
@@ -3434,6 +4442,320 @@ out:
 }
 EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
 
+/**
+ *     devlink_params_register - register configuration parameters
+ *
+ *     @devlink: devlink
+ *     @params: configuration parameters array
+ *     @params_count: number of parameters provided
+ *
+ *     Register the configuration parameters supported by the driver.
+ */
+int devlink_params_register(struct devlink *devlink,
+                           const struct devlink_param *params,
+                           size_t params_count)
+{
+       const struct devlink_param *param = params;
+       int i;
+       int err;
+
+       mutex_lock(&devlink->lock);
+       for (i = 0; i < params_count; i++, param++) {
+               if (!param || !param->name || !param->supported_cmodes) {
+                       err = -EINVAL;
+                       goto rollback;
+               }
+               if (param->generic) {
+                       err = devlink_param_generic_verify(param);
+                       if (err)
+                               goto rollback;
+               } else {
+                       err = devlink_param_driver_verify(param);
+                       if (err)
+                               goto rollback;
+               }
+               err = devlink_param_register_one(devlink, param);
+               if (err)
+                       goto rollback;
+       }
+
+       mutex_unlock(&devlink->lock);
+       return 0;
+
+rollback:
+       if (!i)
+               goto unlock;
+       for (param--; i > 0; i--, param--)
+               devlink_param_unregister_one(devlink, param);
+unlock:
+       mutex_unlock(&devlink->lock);
+       return err;
+}
+EXPORT_SYMBOL_GPL(devlink_params_register);
+
+/**
+ *     devlink_params_unregister - unregister configuration parameters
+ *     @devlink: devlink
+ *     @params: configuration parameters to unregister
+ *     @params_count: number of parameters provided
+ */
+void devlink_params_unregister(struct devlink *devlink,
+                              const struct devlink_param *params,
+                              size_t params_count)
+{
+       const struct devlink_param *param = params;
+       int i;
+
+       mutex_lock(&devlink->lock);
+       for (i = 0; i < params_count; i++, param++)
+               devlink_param_unregister_one(devlink, param);
+       mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_params_unregister);
+
+/**
+ *     devlink_param_driverinit_value_get - get configuration parameter
+ *                                          value for driver initializing
+ *
+ *     @devlink: devlink
+ *     @param_id: parameter ID
+ *     @init_val: value of parameter in driverinit configuration mode
+ *
+ *     This function should be used by the driver to get driverinit
+ *     configuration for initialization after reload command.
+ */
+int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+                                      union devlink_param_value *init_val)
+{
+       struct devlink_param_item *param_item;
+
+       if (!devlink->ops || !devlink->ops->reload)
+               return -EOPNOTSUPP;
+
+       param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+       if (!param_item)
+               return -EINVAL;
+
+       if (!param_item->driverinit_value_valid ||
+           !devlink_param_cmode_is_supported(param_item->param,
+                                             DEVLINK_PARAM_CMODE_DRIVERINIT))
+               return -EOPNOTSUPP;
+
+       *init_val = param_item->driverinit_value;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
+
+/**
+ *     devlink_param_driverinit_value_set - set value of configuration
+ *                                          parameter for driverinit
+ *                                          configuration mode
+ *
+ *     @devlink: devlink
+ *     @param_id: parameter ID
+ *     @init_val: value of parameter to set for driverinit configuration mode
+ *
+ *     This function should be used by the driver to set driverinit
+ *     configuration mode default value.
+ */
+int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+                                      union devlink_param_value init_val)
+{
+       struct devlink_param_item *param_item;
+
+       param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+       if (!param_item)
+               return -EINVAL;
+
+       if (!devlink_param_cmode_is_supported(param_item->param,
+                                             DEVLINK_PARAM_CMODE_DRIVERINIT))
+               return -EOPNOTSUPP;
+
+       param_item->driverinit_value = init_val;
+       param_item->driverinit_value_valid = true;
+
+       devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
+
+/**
+ *     devlink_param_value_changed - notify devlink on a parameter's value
+ *                                   change. Should be called by the driver
+ *                                   right after the change.
+ *
+ *     @devlink: devlink
+ *     @param_id: parameter ID
+ *
+ *     This function should be used by the driver to notify devlink on value
+ *     change, excluding driverinit configuration mode.
+ *     For driverinit configuration mode driver should use the function
+ *     devlink_param_driverinit_value_set() instead.
+ */
+void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
+{
+       struct devlink_param_item *param_item;
+
+       param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+       WARN_ON(!param_item);
+
+       devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devlink_param_value_changed);
+
+/**
+ *     devlink_region_create - create a new address region
+ *
+ *     @devlink: devlink
+ *     @region_name: region name
+ *     @region_max_snapshots: Maximum supported number of snapshots for region
+ *     @region_size: size of region
+ */
+struct devlink_region *devlink_region_create(struct devlink *devlink,
+                                            const char *region_name,
+                                            u32 region_max_snapshots,
+                                            u64 region_size)
+{
+       struct devlink_region *region;
+       int err = 0;
+
+       mutex_lock(&devlink->lock);
+
+       if (devlink_region_get_by_name(devlink, region_name)) {
+               err = -EEXIST;
+               goto unlock;
+       }
+
+       region = kzalloc(sizeof(*region), GFP_KERNEL);
+       if (!region) {
+               err = -ENOMEM;
+               goto unlock;
+       }
+
+       region->devlink = devlink;
+       region->max_snapshots = region_max_snapshots;
+       region->name = region_name;
+       region->size = region_size;
+       INIT_LIST_HEAD(&region->snapshot_list);
+       list_add_tail(&region->list, &devlink->region_list);
+       devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+       mutex_unlock(&devlink->lock);
+       return region;
+
+unlock:
+       mutex_unlock(&devlink->lock);
+       return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devlink_region_create);
+
+/**
+ *     devlink_region_destroy - destroy address region
+ *
+ *     @region: devlink region to destroy
+ */
+void devlink_region_destroy(struct devlink_region *region)
+{
+       struct devlink *devlink = region->devlink;
+       struct devlink_snapshot *snapshot, *ts;
+
+       mutex_lock(&devlink->lock);
+
+       /* Free all snapshots of region */
+       list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
+               devlink_region_snapshot_del(snapshot);
+
+       list_del(&region->list);
+
+       devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+       mutex_unlock(&devlink->lock);
+       kfree(region);
+}
+EXPORT_SYMBOL_GPL(devlink_region_destroy);
+
+/**
+ *     devlink_region_shapshot_id_get - get snapshot ID
+ *
+ *     This callback should be called when adding a new snapshot,
+ *     Driver should use the same id for multiple snapshots taken
+ *     on multiple regions at the same time/by the same trigger.
+ *
+ *     @devlink: devlink
+ */
+u32 devlink_region_shapshot_id_get(struct devlink *devlink)
+{
+       u32 id;
+
+       mutex_lock(&devlink->lock);
+       id = ++devlink->snapshot_id;
+       mutex_unlock(&devlink->lock);
+
+       return id;
+}
+EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get);
+
+/**
+ *     devlink_region_snapshot_create - create a new snapshot
+ *     This will add a new snapshot of a region. The snapshot
+ *     will be stored on the region struct and can be accessed
+ *     from devlink. This is useful for future analyses of snapshots.
+ *     Multiple snapshots can be created on a region.
+ *     The @snapshot_id should be obtained using the getter function.
+ *
+ *     @devlink_region: devlink region of the snapshot
+ *     @data_len: size of snapshot data
+ *     @data: snapshot data
+ *     @snapshot_id: snapshot id to be created
+ *     @data_destructor: pointer to destructor function to free data
+ */
+int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+                                  u8 *data, u32 snapshot_id,
+                                  devlink_snapshot_data_dest_t *data_destructor)
+{
+       struct devlink *devlink = region->devlink;
+       struct devlink_snapshot *snapshot;
+       int err;
+
+       mutex_lock(&devlink->lock);
+
+       /* check if region can hold one more snapshot */
+       if (region->cur_snapshots == region->max_snapshots) {
+               err = -ENOMEM;
+               goto unlock;
+       }
+
+       if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
+               err = -EEXIST;
+               goto unlock;
+       }
+
+       snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+       if (!snapshot) {
+               err = -ENOMEM;
+               goto unlock;
+       }
+
+       snapshot->id = snapshot_id;
+       snapshot->region = region;
+       snapshot->data = data;
+       snapshot->data_len = data_len;
+       snapshot->data_destructor = data_destructor;
+
+       list_add_tail(&snapshot->list, &region->snapshot_list);
+
+       region->cur_snapshots++;
+
+       devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW);
+       mutex_unlock(&devlink->lock);
+       return 0;
+
+unlock:
+       mutex_unlock(&devlink->lock);
+       return err;
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
+
 static int __init devlink_module_init(void)
 {
        return genl_register_family(&devlink_nl_family);
index e677a20180cf304a27154d12c338da046c96a546..c9993c6c2fd4f492d5113d9c328c7bf3ddcfa9f3 100644 (file)
@@ -111,6 +111,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
        [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] =       "rx-udp_tunnel-port-offload",
        [NETIF_F_HW_TLS_RECORD_BIT] =   "tls-hw-record",
        [NETIF_F_HW_TLS_TX_BIT] =        "tls-hw-tx-offload",
+       [NETIF_F_HW_TLS_RX_BIT] =        "tls-hw-rx-offload",
 };
 
 static const char
index f64aa13811eaeedf8f0040bc9f993ad9e1661eca..0ff3953f64aa7830a07dad9c0873813eb944fdd1 100644 (file)
@@ -924,8 +924,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
        return 0;
 
 errout:
-       if (nlrule)
-               kfree(nlrule);
+       kfree(nlrule);
        rules_ops_put(ops);
        return err;
 }
index 9dfd145eedcc3367f8356ed9426fdf6124104583..7509bb7f06945f8837a4fb43a9d9304db86164f4 100644 (file)
@@ -3681,7 +3681,7 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
        if (unlikely(size > IP_TUNNEL_OPTS_MAX))
                return -ENOMEM;
 
-       ip_tunnel_info_opts_set(info, from, size);
+       ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
 
        return 0;
 }
@@ -4753,6 +4753,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
        case BPF_FUNC_trace_printk:
                if (capable(CAP_SYS_ADMIN))
                        return bpf_get_trace_printk_proto();
+               /* else: fall through */
        default:
                return NULL;
        }
index 53f96e4f7bf593863b584e050eb7628d4023718a..08a5184f4b344281ed1c458aa62a3033e63a81e5 100644 (file)
@@ -152,7 +152,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
            !dissector_uses_key(flow_dissector,
                                FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
            !dissector_uses_key(flow_dissector,
-                               FLOW_DISSECTOR_KEY_ENC_PORTS))
+                               FLOW_DISSECTOR_KEY_ENC_PORTS) &&
+           !dissector_uses_key(flow_dissector,
+                               FLOW_DISSECTOR_KEY_ENC_IP))
                return;
 
        info = skb_tunnel_info(skb);
@@ -212,6 +214,16 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
                tp->src = key->tp_src;
                tp->dst = key->tp_dst;
        }
+
+       if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
+               struct flow_dissector_key_ip *ip;
+
+               ip = skb_flow_dissector_target(flow_dissector,
+                                              FLOW_DISSECTOR_KEY_ENC_IP,
+                                              target_container);
+               ip->tos = key->tos;
+               ip->ttl = key->ttl;
+       }
 }
 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
 
@@ -589,7 +601,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
        struct flow_dissector_key_tags *key_tags;
        struct flow_dissector_key_vlan *key_vlan;
        enum flow_dissect_ret fdret;
-       bool skip_vlan = false;
+       enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
        int num_hdrs = 0;
        u8 ip_proto = 0;
        bool ret;
@@ -748,14 +760,14 @@ proto_again:
        }
        case htons(ETH_P_8021AD):
        case htons(ETH_P_8021Q): {
-               const struct vlan_hdr *vlan;
+               const struct vlan_hdr *vlan = NULL;
                struct vlan_hdr _vlan;
-               bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
+               __be16 saved_vlan_tpid = proto;
 
-               if (vlan_tag_present)
+               if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX &&
+                   skb && skb_vlan_tag_present(skb)) {
                        proto = skb->protocol;
-
-               if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
+               } else {
                        vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
                                                    data, hlen, &_vlan);
                        if (!vlan) {
@@ -765,20 +777,23 @@ proto_again:
 
                        proto = vlan->h_vlan_encapsulated_proto;
                        nhoff += sizeof(*vlan);
-                       if (skip_vlan) {
-                               fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
-                               break;
-                       }
                }
 
-               skip_vlan = true;
-               if (dissector_uses_key(flow_dissector,
-                                      FLOW_DISSECTOR_KEY_VLAN)) {
+               if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) {
+                       dissector_vlan = FLOW_DISSECTOR_KEY_VLAN;
+               } else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) {
+                       dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN;
+               } else {
+                       fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+                       break;
+               }
+
+               if (dissector_uses_key(flow_dissector, dissector_vlan)) {
                        key_vlan = skb_flow_dissector_target(flow_dissector,
-                                                            FLOW_DISSECTOR_KEY_VLAN,
+                                                            dissector_vlan,
                                                             target_container);
 
-                       if (vlan_tag_present) {
+                       if (!vlan) {
                                key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
                                key_vlan->vlan_priority =
                                        (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
@@ -789,6 +804,7 @@ proto_again:
                                        (ntohs(vlan->h_vlan_TCI) &
                                         VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
                        }
+                       key_vlan->vlan_tpid = saved_vlan_tpid;
                }
 
                fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
index 8e3fda9e725cba97973ed2ce85ebe6f2e926e7cf..aa19d86937afabf5fb7dd29ce7c2379241281c06 100644 (file)
@@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
                neigh->nud_state = new;
                err = 0;
                notify = old & NUD_VALID;
-               if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
+               if (((old & (NUD_INCOMPLETE | NUD_PROBE)) ||
+                    (flags & NEIGH_UPDATE_F_ADMIN)) &&
                    (new & NUD_FAILED)) {
                        neigh_invalidate(neigh);
                        notify = 1;
@@ -3273,4 +3274,3 @@ static int __init neigh_init(void)
 }
 
 subsys_initcall(neigh_init);
-
index bb7e80f4ced3746dc6946cfbc3e71520db503e50..0a95bcf64cdc3168ca28ce1b6d68b85e3b693312 100644 (file)
@@ -905,11 +905,20 @@ static const void *rx_queue_namespace(struct kobject *kobj)
        return ns;
 }
 
+static void rx_queue_get_ownership(struct kobject *kobj,
+                                  kuid_t *uid, kgid_t *gid)
+{
+       const struct net *net = rx_queue_namespace(kobj);
+
+       net_ns_get_ownership(net, uid, gid);
+}
+
 static struct kobj_type rx_queue_ktype __ro_after_init = {
        .sysfs_ops = &rx_queue_sysfs_ops,
        .release = rx_queue_release,
        .default_attrs = rx_queue_default_attrs,
-       .namespace = rx_queue_namespace
+       .namespace = rx_queue_namespace,
+       .get_ownership = rx_queue_get_ownership,
 };
 
 static int rx_queue_add_kobject(struct net_device *dev, int index)
@@ -1047,13 +1056,30 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
                                  char *buf)
 {
        struct net_device *dev = queue->dev;
-       int index = get_netdev_queue_index(queue);
-       int tc = netdev_txq_to_tc(dev, index);
+       int index;
+       int tc;
+
+       if (!netif_is_multiqueue(dev))
+               return -ENOENT;
 
+       index = get_netdev_queue_index(queue);
+
+       /* If queue belongs to subordinate dev use its TC mapping */
+       dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
+       tc = netdev_txq_to_tc(dev, index);
        if (tc < 0)
                return -EINVAL;
 
-       return sprintf(buf, "%u\n", tc);
+       /* We can report the traffic class one of two ways:
+        * Subordinate device traffic classes are reported with the traffic
+        * class first, and then the subordinate class so for example TC0 on
+        * subordinate device 2 will be reported as "0-2". If the queue
+        * belongs to the root device it will be reported with just the
+        * traffic class, so just "0" for TC 0 for example.
+        */
+       return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) :
+                                sprintf(buf, "%u\n", tc);
 }
 
 #ifdef CONFIG_XPS
@@ -1070,6 +1096,9 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
        int err, index = get_netdev_queue_index(queue);
        u32 rate = 0;
 
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
        err = kstrtou32(buf, 10, &rate);
        if (err < 0)
                return err;
@@ -1214,10 +1243,20 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
        cpumask_var_t mask;
        unsigned long index;
 
+       if (!netif_is_multiqueue(dev))
+               return -ENOENT;
+
        index = get_netdev_queue_index(queue);
 
        if (dev->num_tc) {
+               /* Do not allow XPS on subordinate device directly */
                num_tc = dev->num_tc;
+               if (num_tc < 0)
+                       return -EINVAL;
+
+               /* If queue belongs to subordinate dev use its map */
+               dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
                tc = netdev_txq_to_tc(dev, index);
                if (tc < 0)
                        return -EINVAL;
@@ -1227,13 +1266,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
                return -ENOMEM;
 
        rcu_read_lock();
-       dev_maps = rcu_dereference(dev->xps_maps);
+       dev_maps = rcu_dereference(dev->xps_cpus_map);
        if (dev_maps) {
                for_each_possible_cpu(cpu) {
                        int i, tci = cpu * num_tc + tc;
                        struct xps_map *map;
 
-                       map = rcu_dereference(dev_maps->cpu_map[tci]);
+                       map = rcu_dereference(dev_maps->attr_map[tci]);
                        if (!map)
                                continue;
 
@@ -1260,6 +1299,9 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
        cpumask_var_t mask;
        int err;
 
+       if (!netif_is_multiqueue(dev))
+               return -ENOENT;
+
        if (!capable(CAP_NET_ADMIN))
                return -EPERM;
 
@@ -1283,6 +1325,88 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
 
 static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
        = __ATTR_RW(xps_cpus);
+
+static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
+{
+       struct net_device *dev = queue->dev;
+       struct xps_dev_maps *dev_maps;
+       unsigned long *mask, index;
+       int j, len, num_tc = 1, tc = 0;
+
+       index = get_netdev_queue_index(queue);
+
+       if (dev->num_tc) {
+               num_tc = dev->num_tc;
+               tc = netdev_txq_to_tc(dev, index);
+               if (tc < 0)
+                       return -EINVAL;
+       }
+       mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
+                      GFP_KERNEL);
+       if (!mask)
+               return -ENOMEM;
+
+       rcu_read_lock();
+       dev_maps = rcu_dereference(dev->xps_rxqs_map);
+       if (!dev_maps)
+               goto out_no_maps;
+
+       for (j = -1; j = netif_attrmask_next(j, NULL, dev->num_rx_queues),
+            j < dev->num_rx_queues;) {
+               int i, tci = j * num_tc + tc;
+               struct xps_map *map;
+
+               map = rcu_dereference(dev_maps->attr_map[tci]);
+               if (!map)
+                       continue;
+
+               for (i = map->len; i--;) {
+                       if (map->queues[i] == index) {
+                               set_bit(j, mask);
+                               break;
+                       }
+               }
+       }
+out_no_maps:
+       rcu_read_unlock();
+
+       len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
+       kfree(mask);
+
+       return len < PAGE_SIZE ? len : -EINVAL;
+}
+
+static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
+                             size_t len)
+{
+       struct net_device *dev = queue->dev;
+       struct net *net = dev_net(dev);
+       unsigned long *mask, index;
+       int err;
+
+       if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+               return -EPERM;
+
+       mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
+                      GFP_KERNEL);
+       if (!mask)
+               return -ENOMEM;
+
+       index = get_netdev_queue_index(queue);
+
+       err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
+       if (err) {
+               kfree(mask);
+               return err;
+       }
+
+       err = __netif_set_xps_queue(dev, mask, index, true);
+       kfree(mask);
+       return err ? : len;
+}
+
+static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
+       = __ATTR_RW(xps_rxqs);
 #endif /* CONFIG_XPS */
 
 static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
@@ -1290,6 +1414,7 @@ static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
        &queue_traffic_class.attr,
 #ifdef CONFIG_XPS
        &xps_cpus_attribute.attr,
+       &xps_rxqs_attribute.attr,
        &queue_tx_maxrate.attr,
 #endif
        NULL
@@ -1315,11 +1440,20 @@ static const void *netdev_queue_namespace(struct kobject *kobj)
        return ns;
 }
 
+static void netdev_queue_get_ownership(struct kobject *kobj,
+                                      kuid_t *uid, kgid_t *gid)
+{
+       const struct net *net = netdev_queue_namespace(kobj);
+
+       net_ns_get_ownership(net, uid, gid);
+}
+
 static struct kobj_type netdev_queue_ktype __ro_after_init = {
        .sysfs_ops = &netdev_queue_sysfs_ops,
        .release = netdev_queue_release,
        .default_attrs = netdev_queue_default_attrs,
        .namespace = netdev_queue_namespace,
+       .get_ownership = netdev_queue_get_ownership,
 };
 
 static int netdev_queue_add_kobject(struct net_device *dev, int index)
@@ -1509,6 +1643,14 @@ static const void *net_namespace(struct device *d)
        return dev_net(dev);
 }
 
+static void net_get_ownership(struct device *d, kuid_t *uid, kgid_t *gid)
+{
+       struct net_device *dev = to_net_dev(d);
+       const struct net *net = dev_net(dev);
+
+       net_ns_get_ownership(net, uid, gid);
+}
+
 static struct class net_class __ro_after_init = {
        .name = "net",
        .dev_release = netdev_release,
@@ -1516,6 +1658,7 @@ static struct class net_class __ro_after_init = {
        .dev_uevent = netdev_uevent,
        .ns_type = &net_ns_type_operations,
        .namespace = net_namespace,
+       .get_ownership = net_get_ownership,
 };
 
 #ifdef CONFIG_OF_NET
index a11e03f920d3a7d9d50a37f699ffd66a26a8a057..738871af5efaa246b55c1803259e98f67a60ae54 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/user_namespace.h>
 #include <linux/net_namespace.h>
 #include <linux/sched/task.h>
+#include <linux/uidgid.h>
 
 #include <net/sock.h>
 #include <net/netlink.h>
@@ -448,6 +449,33 @@ dec_ucounts:
        return net;
 }
 
+/**
+ * net_ns_get_ownership - get sysfs ownership data for @net
+ * @net: network namespace in question (can be NULL)
+ * @uid: kernel user ID for sysfs objects
+ * @gid: kernel group ID for sysfs objects
+ *
+ * Returns the uid/gid pair of root in the user namespace associated with the
+ * given network namespace.
+ */
+void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
+{
+       if (net) {
+               kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
+               kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
+
+               if (uid_valid(ns_root_uid))
+                       *uid = ns_root_uid;
+
+               if (gid_valid(ns_root_gid))
+                       *gid = ns_root_gid;
+       } else {
+               *uid = GLOBAL_ROOT_UID;
+               *gid = GLOBAL_ROOT_GID;
+       }
+}
+EXPORT_SYMBOL_GPL(net_ns_get_ownership);
+
 static void unhash_nsid(struct net *net, struct net *last)
 {
        struct net *tmp;
index 49368e21d228c3f0bd6684c8831fc1e4398d56b1..7f6938405fa192426bbb680f5396fe3770250ae6 100644 (file)
@@ -1265,7 +1265,7 @@ static ssize_t pktgen_if_write(struct file *file,
                buf[len] = 0;
                if (strcmp(buf, pkt_dev->dst_min) != 0) {
                        memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min));
-                       strncpy(pkt_dev->dst_min, buf, len);
+                       strcpy(pkt_dev->dst_min, buf);
                        pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
                        pkt_dev->cur_daddr = pkt_dev->daddr_min;
                }
@@ -1280,14 +1280,12 @@ static ssize_t pktgen_if_write(struct file *file,
                if (len < 0)
                        return len;
 
-
                if (copy_from_user(buf, &user_buffer[i], len))
                        return -EFAULT;
-
                buf[len] = 0;
                if (strcmp(buf, pkt_dev->dst_max) != 0) {
                        memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max));
-                       strncpy(pkt_dev->dst_max, buf, len);
+                       strcpy(pkt_dev->dst_max, buf);
                        pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
                        pkt_dev->cur_daddr = pkt_dev->daddr_max;
                }
@@ -1396,7 +1394,7 @@ static ssize_t pktgen_if_write(struct file *file,
                buf[len] = 0;
                if (strcmp(buf, pkt_dev->src_min) != 0) {
                        memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min));
-                       strncpy(pkt_dev->src_min, buf, len);
+                       strcpy(pkt_dev->src_min, buf);
                        pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
                        pkt_dev->cur_saddr = pkt_dev->saddr_min;
                }
@@ -1416,7 +1414,7 @@ static ssize_t pktgen_if_write(struct file *file,
                buf[len] = 0;
                if (strcmp(buf, pkt_dev->src_max) != 0) {
                        memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max));
-                       strncpy(pkt_dev->src_max, buf, len);
+                       strcpy(pkt_dev->src_max, buf);
                        pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
                        pkt_dev->cur_saddr = pkt_dev->saddr_max;
                }
@@ -2255,7 +2253,7 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
                        x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
                } else {
                        /* slow path: we dont already have xfrm_state */
-                       x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
+                       x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0,
                                                (xfrm_address_t *)&pkt_dev->cur_daddr,
                                                (xfrm_address_t *)&pkt_dev->cur_saddr,
                                                AF_INET,
index e3f743c141b3f7b4684fba1ff7de10b27af35349..24431e578310cb1602c05ea26a24069d49309c74 100644 (file)
@@ -964,7 +964,8 @@ static size_t rtnl_xdp_size(void)
 {
        size_t xdp_size = nla_total_size(0) +   /* nest IFLA_XDP */
                          nla_total_size(1) +   /* XDP_ATTACHED */
-                         nla_total_size(4);    /* XDP_PROG_ID */
+                         nla_total_size(4) +   /* XDP_PROG_ID (or 1st mode) */
+                         nla_total_size(4);    /* XDP_<mode>_PROG_ID */
 
        return xdp_size;
 }
@@ -1014,6 +1015,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
               + nla_total_size(4)  /* IFLA_IF_NETNSID */
               + nla_total_size(4)  /* IFLA_CARRIER_UP_COUNT */
               + nla_total_size(4)  /* IFLA_CARRIER_DOWN_COUNT */
+              + nla_total_size(4)  /* IFLA_MIN_MTU */
+              + nla_total_size(4)  /* IFLA_MAX_MTU */
               + 0;
 }
 
@@ -1353,27 +1356,51 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
        return 0;
 }
 
-static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
+static u32 rtnl_xdp_prog_skb(struct net_device *dev)
 {
-       const struct net_device_ops *ops = dev->netdev_ops;
        const struct bpf_prog *generic_xdp_prog;
-       struct netdev_bpf xdp;
 
        ASSERT_RTNL();
 
-       *prog_id = 0;
        generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
-       if (generic_xdp_prog) {
-               *prog_id = generic_xdp_prog->aux->id;
-               return XDP_ATTACHED_SKB;
-       }
-       if (!ops->ndo_bpf)
-               return XDP_ATTACHED_NONE;
+       if (!generic_xdp_prog)
+               return 0;
+       return generic_xdp_prog->aux->id;
+}
 
-       __dev_xdp_query(dev, ops->ndo_bpf, &xdp);
-       *prog_id = xdp.prog_id;
+static u32 rtnl_xdp_prog_drv(struct net_device *dev)
+{
+       return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
+}
 
-       return xdp.prog_attached;
+static u32 rtnl_xdp_prog_hw(struct net_device *dev)
+{
+       return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
+                              XDP_QUERY_PROG_HW);
+}
+
+static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
+                              u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
+                              u32 (*get_prog_id)(struct net_device *dev))
+{
+       u32 curr_id;
+       int err;
+
+       curr_id = get_prog_id(dev);
+       if (!curr_id)
+               return 0;
+
+       *prog_id = curr_id;
+       err = nla_put_u32(skb, attr, curr_id);
+       if (err)
+               return err;
+
+       if (*mode != XDP_ATTACHED_NONE)
+               *mode = XDP_ATTACHED_MULTI;
+       else
+               *mode = tgt_mode;
+
+       return 0;
 }
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1381,17 +1408,32 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
        struct nlattr *xdp;
        u32 prog_id;
        int err;
+       u8 mode;
 
        xdp = nla_nest_start(skb, IFLA_XDP);
        if (!xdp)
                return -EMSGSIZE;
 
-       err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
-                        rtnl_xdp_attached_mode(dev, &prog_id));
+       prog_id = 0;
+       mode = XDP_ATTACHED_NONE;
+       err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
+                                 IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb);
+       if (err)
+               goto err_cancel;
+       err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
+                                 IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv);
+       if (err)
+               goto err_cancel;
+       err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
+                                 IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw);
+       if (err)
+               goto err_cancel;
+
+       err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode);
        if (err)
                goto err_cancel;
 
-       if (prog_id) {
+       if (prog_id && mode != XDP_ATTACHED_MULTI) {
                err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
                if (err)
                        goto err_cancel;
@@ -1561,6 +1603,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
                       netif_running(dev) ? dev->operstate : IF_OPER_DOWN) ||
            nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) ||
            nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
+           nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) ||
+           nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) ||
            nla_put_u32(skb, IFLA_GROUP, dev->group) ||
            nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
            nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
@@ -1692,6 +1736,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
        [IFLA_IF_NETNSID]       = { .type = NLA_S32 },
        [IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 },
        [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
+       [IFLA_MIN_MTU]          = { .type = NLA_U32 },
+       [IFLA_MAX_MTU]          = { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2336,7 +2382,7 @@ static int do_setlink(const struct sk_buff *skb,
        }
 
        if (tb[IFLA_MTU]) {
-               err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+               err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack);
                if (err < 0)
                        goto errout;
                status |= DO_SETLINK_MODIFIED;
index fb35b62af2724025f743d61de24f9fb7eb9186a8..51b0a9126e121c3cf092a6baaa6fe58d749213a2 100644 (file)
@@ -1715,7 +1715,7 @@ void *skb_push(struct sk_buff *skb, unsigned int len)
 {
        skb->data -= len;
        skb->len  += len;
-       if (unlikely(skb->data<skb->head))
+       if (unlikely(skb->data < skb->head))
                skb_under_panic(skb, len, __builtin_return_address(0));
        return skb->data;
 }
@@ -3816,14 +3816,14 @@ err:
 }
 EXPORT_SYMBOL_GPL(skb_segment);
 
-int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 {
        struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
        unsigned int offset = skb_gro_offset(skb);
        unsigned int headlen = skb_headlen(skb);
        unsigned int len = skb_gro_len(skb);
-       struct sk_buff *lp, *p = *head;
        unsigned int delta_truesize;
+       struct sk_buff *lp;
 
        if (unlikely(p->len + len >= 65536))
                return -E2BIG;
@@ -4899,7 +4899,6 @@ EXPORT_SYMBOL(skb_try_coalesce);
  */
 void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 {
-       skb->tstamp = 0;
        skb->pkt_type = PACKET_HOST;
        skb->skb_iif = 0;
        skb->ignore_df = 0;
@@ -4912,8 +4911,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
                return;
 
        ipvs_reset(skb);
-       skb_orphan(skb);
        skb->mark = 0;
+       skb->tstamp = 0;
 }
 EXPORT_SYMBOL_GPL(skb_scrub_packet);
 
index bc2d7a37297fecfbf3fbddd09ce53931fe0e28af..e31233f5ba39d98eeb0b50e2a1f771d20735def1 100644 (file)
@@ -91,6 +91,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <asm/unaligned.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/errqueue.h>
@@ -249,58 +250,13 @@ static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
        _sock_locks("k-clock-")
 };
 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
-  "rlock-AF_UNSPEC", "rlock-AF_UNIX"     , "rlock-AF_INET"     ,
-  "rlock-AF_AX25"  , "rlock-AF_IPX"      , "rlock-AF_APPLETALK",
-  "rlock-AF_NETROM", "rlock-AF_BRIDGE"   , "rlock-AF_ATMPVC"   ,
-  "rlock-AF_X25"   , "rlock-AF_INET6"    , "rlock-AF_ROSE"     ,
-  "rlock-AF_DECnet", "rlock-AF_NETBEUI"  , "rlock-AF_SECURITY" ,
-  "rlock-AF_KEY"   , "rlock-AF_NETLINK"  , "rlock-AF_PACKET"   ,
-  "rlock-AF_ASH"   , "rlock-AF_ECONET"   , "rlock-AF_ATMSVC"   ,
-  "rlock-AF_RDS"   , "rlock-AF_SNA"      , "rlock-AF_IRDA"     ,
-  "rlock-AF_PPPOX" , "rlock-AF_WANPIPE"  , "rlock-AF_LLC"      ,
-  "rlock-27"       , "rlock-28"          , "rlock-AF_CAN"      ,
-  "rlock-AF_TIPC"  , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV"     ,
-  "rlock-AF_RXRPC" , "rlock-AF_ISDN"     , "rlock-AF_PHONET"   ,
-  "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG"      ,
-  "rlock-AF_NFC"   , "rlock-AF_VSOCK"    , "rlock-AF_KCM"      ,
-  "rlock-AF_QIPCRTR", "rlock-AF_SMC"     , "rlock-AF_XDP"      ,
-  "rlock-AF_MAX"
+       _sock_locks("rlock-")
 };
 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
-  "wlock-AF_UNSPEC", "wlock-AF_UNIX"     , "wlock-AF_INET"     ,
-  "wlock-AF_AX25"  , "wlock-AF_IPX"      , "wlock-AF_APPLETALK",
-  "wlock-AF_NETROM", "wlock-AF_BRIDGE"   , "wlock-AF_ATMPVC"   ,
-  "wlock-AF_X25"   , "wlock-AF_INET6"    , "wlock-AF_ROSE"     ,
-  "wlock-AF_DECnet", "wlock-AF_NETBEUI"  , "wlock-AF_SECURITY" ,
-  "wlock-AF_KEY"   , "wlock-AF_NETLINK"  , "wlock-AF_PACKET"   ,
-  "wlock-AF_ASH"   , "wlock-AF_ECONET"   , "wlock-AF_ATMSVC"   ,
-  "wlock-AF_RDS"   , "wlock-AF_SNA"      , "wlock-AF_IRDA"     ,
-  "wlock-AF_PPPOX" , "wlock-AF_WANPIPE"  , "wlock-AF_LLC"      ,
-  "wlock-27"       , "wlock-28"          , "wlock-AF_CAN"      ,
-  "wlock-AF_TIPC"  , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV"     ,
-  "wlock-AF_RXRPC" , "wlock-AF_ISDN"     , "wlock-AF_PHONET"   ,
-  "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG"      ,
-  "wlock-AF_NFC"   , "wlock-AF_VSOCK"    , "wlock-AF_KCM"      ,
-  "wlock-AF_QIPCRTR", "wlock-AF_SMC"     , "wlock-AF_XDP"      ,
-  "wlock-AF_MAX"
+       _sock_locks("wlock-")
 };
 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
-  "elock-AF_UNSPEC", "elock-AF_UNIX"     , "elock-AF_INET"     ,
-  "elock-AF_AX25"  , "elock-AF_IPX"      , "elock-AF_APPLETALK",
-  "elock-AF_NETROM", "elock-AF_BRIDGE"   , "elock-AF_ATMPVC"   ,
-  "elock-AF_X25"   , "elock-AF_INET6"    , "elock-AF_ROSE"     ,
-  "elock-AF_DECnet", "elock-AF_NETBEUI"  , "elock-AF_SECURITY" ,
-  "elock-AF_KEY"   , "elock-AF_NETLINK"  , "elock-AF_PACKET"   ,
-  "elock-AF_ASH"   , "elock-AF_ECONET"   , "elock-AF_ATMSVC"   ,
-  "elock-AF_RDS"   , "elock-AF_SNA"      , "elock-AF_IRDA"     ,
-  "elock-AF_PPPOX" , "elock-AF_WANPIPE"  , "elock-AF_LLC"      ,
-  "elock-27"       , "elock-28"          , "elock-AF_CAN"      ,
-  "elock-AF_TIPC"  , "elock-AF_BLUETOOTH", "elock-AF_IUCV"     ,
-  "elock-AF_RXRPC" , "elock-AF_ISDN"     , "elock-AF_PHONET"   ,
-  "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG"      ,
-  "elock-AF_NFC"   , "elock-AF_VSOCK"    , "elock-AF_KCM"      ,
-  "elock-AF_QIPCRTR", "elock-AF_SMC"     , "elock-AF_XDP"      ,
-  "elock-AF_MAX"
+       _sock_locks("elock-")
 };
 
 /*
@@ -697,6 +653,7 @@ EXPORT_SYMBOL(sk_mc_loop);
 int sock_setsockopt(struct socket *sock, int level, int optname,
                    char __user *optval, unsigned int optlen)
 {
+       struct sock_txtime sk_txtime;
        struct sock *sk = sock->sk;
        int val;
        int valbool;
@@ -1070,6 +1027,26 @@ set_rcvbuf:
                }
                break;
 
+       case SO_TXTIME:
+               if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+                       ret = -EPERM;
+               } else if (optlen != sizeof(struct sock_txtime)) {
+                       ret = -EINVAL;
+               } else if (copy_from_user(&sk_txtime, optval,
+                          sizeof(struct sock_txtime))) {
+                       ret = -EFAULT;
+               } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
+                       ret = -EINVAL;
+               } else {
+                       sock_valbool_flag(sk, SOCK_TXTIME, true);
+                       sk->sk_clockid = sk_txtime.clockid;
+                       sk->sk_txtime_deadline_mode =
+                               !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
+                       sk->sk_txtime_report_errors =
+                               !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
+               }
+               break;
+
        default:
                ret = -ENOPROTOOPT;
                break;
@@ -1115,6 +1092,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                u64 val64;
                struct linger ling;
                struct timeval tm;
+               struct sock_txtime txtime;
        } v;
 
        int lv = sizeof(int);
@@ -1403,6 +1381,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                v.val = sock_flag(sk, SOCK_ZEROCOPY);
                break;
 
+       case SO_TXTIME:
+               lv = sizeof(v.txtime);
+               v.txtime.clockid = sk->sk_clockid;
+               v.txtime.flags |= sk->sk_txtime_deadline_mode ?
+                                 SOF_TXTIME_DEADLINE_MODE : 0;
+               v.txtime.flags |= sk->sk_txtime_report_errors ?
+                                 SOF_TXTIME_REPORT_ERRORS : 0;
+               break;
+
        default:
                /* We implement the SO_SNDLOWAT etc to not be settable
                 * (1003.1g 7).
@@ -2137,6 +2124,13 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
                sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
                sockc->tsflags |= tsflags;
                break;
+       case SCM_TXTIME:
+               if (!sock_flag(sk, SOCK_TXTIME))
+                       return -EINVAL;
+               if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
+                       return -EINVAL;
+               sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
+               break;
        /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
        case SCM_RIGHTS:
        case SCM_CREDENTIALS:
@@ -2401,9 +2395,10 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
 {
        struct proto *prot = sk->sk_prot;
        long allocated = sk_memory_allocated_add(sk, amt);
+       bool charged = true;
 
        if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
-           !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
+           !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
                goto suppress_allocation;
 
        /* Under limit. */
@@ -2461,7 +2456,8 @@ suppress_allocation:
                        return 1;
        }
 
-       trace_sock_exceed_buf_limit(sk, prot, allocated);
+       if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
+               trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
 
        sk_memory_allocated_sub(sk, amt);
 
@@ -2818,6 +2814,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_pacing_rate = ~0U;
        sk->sk_pacing_shift = 10;
        sk->sk_incoming_cpu = -1;
+
+       sk_rx_queue_clear(sk);
        /*
         * Before updating sk_refcnt, we must commit prior changes to memory
         * (Documentation/RCU/rculist_nulls.txt for details)
index d47863b07a606a5a1e98906471ee23b447dd1f9a..2a597ac7808e2c9b3e68b9ca03ffe0dd4a0d9853 100644 (file)
@@ -397,7 +397,7 @@ int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af,
                break;
        default:
                pr_err("unexpected address family %d\n", af);
-       };
+       }
 
        return ret;
 }
index 6771f1855b961b325356c699f6b3190bb188e0ee..c013b836006bd02e2fba2e1edea5a8f7e369bd8b 100644 (file)
@@ -3,8 +3,11 @@
  * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
  * Released under terms in GPL version 2.  See COPYING.
  */
+#include <linux/bpf.h>
+#include <linux/filter.h>
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/rhashtable.h>
@@ -45,8 +48,8 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
        BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
                     != sizeof(u32));
 
-       /* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
-       return key << RHT_HASH_RESERVED_SPACE;
+       /* Use cyclic increasing ID as direct hash key */
+       return key;
 }
 
 static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
@@ -371,3 +374,34 @@ void xdp_return_buff(struct xdp_buff *xdp)
        __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle);
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);
+
+int xdp_attachment_query(struct xdp_attachment_info *info,
+                        struct netdev_bpf *bpf)
+{
+       bpf->prog_id = info->prog ? info->prog->aux->id : 0;
+       bpf->prog_flags = info->prog ? info->flags : 0;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_attachment_query);
+
+bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
+                            struct netdev_bpf *bpf)
+{
+       if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) {
+               NL_SET_ERR_MSG(bpf->extack,
+                              "program loaded with different flags");
+               return false;
+       }
+       return true;
+}
+EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
+
+void xdp_attachment_setup(struct xdp_attachment_info *info,
+                         struct netdev_bpf *bpf)
+{
+       if (info->prog)
+               bpf_prog_put(info->prog);
+       info->prog = bpf->prog;
+       info->flags = bpf->flags;
+}
+EXPORT_SYMBOL_GPL(xdp_attachment_setup);
index 2589a6b78aa175a207cf592e5c8b611ca95f232e..a556cd708885a798ba13e1cf8443c8cf96251e28 100644 (file)
@@ -1786,7 +1786,7 @@ static struct dcb_app_type *dcb_app_lookup(const struct dcb_app *app,
                if (itr->app.selector == app->selector &&
                    itr->app.protocol == app->protocol &&
                    itr->ifindex == ifindex &&
-                   (!prio || itr->app.priority == prio))
+                   ((prio == -1) || itr->app.priority == prio))
                        return itr;
        }
 
@@ -1821,7 +1821,8 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app)
        u8 prio = 0;
 
        spin_lock_bh(&dcb_lock);
-       if ((itr = dcb_app_lookup(app, dev->ifindex, 0)))
+       itr = dcb_app_lookup(app, dev->ifindex, -1);
+       if (itr)
                prio = itr->app.priority;
        spin_unlock_bh(&dcb_lock);
 
@@ -1849,7 +1850,8 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new)
 
        spin_lock_bh(&dcb_lock);
        /* Search for existing match and replace */
-       if ((itr = dcb_app_lookup(new, dev->ifindex, 0))) {
+       itr = dcb_app_lookup(new, dev->ifindex, -1);
+       if (itr) {
                if (new->priority)
                        itr->app.priority = new->priority;
                else {
@@ -1882,7 +1884,8 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app)
        u8 prio = 0;
 
        spin_lock_bh(&dcb_lock);
-       if ((itr = dcb_app_lookup(app, dev->ifindex, 0)))
+       itr = dcb_app_lookup(app, dev->ifindex, -1);
+       if (itr)
                prio |= 1 << itr->app.priority;
        spin_unlock_bh(&dcb_lock);
 
@@ -1955,6 +1958,92 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
 }
 EXPORT_SYMBOL(dcb_ieee_delapp);
 
+/**
+ * dcb_ieee_getapp_prio_dscp_mask_map - For a given device, find mapping from
+ * priorities to the DSCP values assigned to that priority. Initialize p_map
+ * such that each map element holds a bit mask of DSCP values configured for
+ * that priority by APP entries.
+ */
+void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev,
+                                       struct dcb_ieee_app_prio_map *p_map)
+{
+       int ifindex = dev->ifindex;
+       struct dcb_app_type *itr;
+       u8 prio;
+
+       memset(p_map->map, 0, sizeof(p_map->map));
+
+       spin_lock_bh(&dcb_lock);
+       list_for_each_entry(itr, &dcb_app_list, list) {
+               if (itr->ifindex == ifindex &&
+                   itr->app.selector == IEEE_8021QAZ_APP_SEL_DSCP &&
+                   itr->app.protocol < 64 &&
+                   itr->app.priority < IEEE_8021QAZ_MAX_TCS) {
+                       prio = itr->app.priority;
+                       p_map->map[prio] |= 1ULL << itr->app.protocol;
+               }
+       }
+       spin_unlock_bh(&dcb_lock);
+}
+EXPORT_SYMBOL(dcb_ieee_getapp_prio_dscp_mask_map);
+
+/**
+ * dcb_ieee_getapp_dscp_prio_mask_map - For a given device, find mapping from
+ * DSCP values to the priorities assigned to that DSCP value. Initialize p_map
+ * such that each map element holds a bit mask of priorities configured for a
+ * given DSCP value by APP entries.
+ */
+void
+dcb_ieee_getapp_dscp_prio_mask_map(const struct net_device *dev,
+                                  struct dcb_ieee_app_dscp_map *p_map)
+{
+       int ifindex = dev->ifindex;
+       struct dcb_app_type *itr;
+
+       memset(p_map->map, 0, sizeof(p_map->map));
+
+       spin_lock_bh(&dcb_lock);
+       list_for_each_entry(itr, &dcb_app_list, list) {
+               if (itr->ifindex == ifindex &&
+                   itr->app.selector == IEEE_8021QAZ_APP_SEL_DSCP &&
+                   itr->app.protocol < 64 &&
+                   itr->app.priority < IEEE_8021QAZ_MAX_TCS)
+                       p_map->map[itr->app.protocol] |= 1 << itr->app.priority;
+       }
+       spin_unlock_bh(&dcb_lock);
+}
+EXPORT_SYMBOL(dcb_ieee_getapp_dscp_prio_mask_map);
+
+/**
+ * Per 802.1Q-2014, the selector value of 1 is used for matching on Ethernet
+ * type, with valid PID values >= 1536. A special meaning is then assigned to
+ * protocol value of 0: "default priority. For use when priority is not
+ * otherwise specified".
+ *
+ * dcb_ieee_getapp_default_prio_mask - For a given device, find all APP entries
+ * of the form {$PRIO, ETHERTYPE, 0} and construct a bit mask of all default
+ * priorities set by these entries.
+ */
+u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev)
+{
+       int ifindex = dev->ifindex;
+       struct dcb_app_type *itr;
+       u8 mask = 0;
+
+       spin_lock_bh(&dcb_lock);
+       list_for_each_entry(itr, &dcb_app_list, list) {
+               if (itr->ifindex == ifindex &&
+                   itr->app.selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
+                   itr->app.protocol == 0 &&
+                   itr->app.priority < IEEE_8021QAZ_MAX_TCS)
+                       mask |= 1 << itr->app.priority;
+       }
+       spin_unlock_bh(&dcb_lock);
+
+       return mask;
+}
+EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask);
+
 static int __init dcbnl_init(void)
 {
        INIT_LIST_HEAD(&dcb_app_list);
index 0d56e36a6db7b77dcdeb9697dd81bf62895e6e4c..875858c8b05927655deb83467391b1075824b9b7 100644 (file)
@@ -325,7 +325,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
        __poll_t mask;
        struct sock *sk = sock->sk;
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
        if (sk->sk_state == DCCP_LISTEN)
                return inet_csk_listen_poll(sk);
 
index f3393e154f0f8879d9cc929cf85dbc4674d10e74..dcc74956badd93ca4b948f7b9fea2d540a7f9083 100644 (file)
@@ -40,4 +40,3 @@ config DECNET_ROUTER
          to work.
 
          See <file:Documentation/networking/decnet.txt> for more information.
-
index 9e38122d942be04dd53d7ec2a7672afcc8a244d9..07b38e441b2d0f6a6fb54ad35b838e0311cce22f 100644 (file)
@@ -8,4 +8,3 @@ decnet-$(CONFIG_DECNET_ROUTER) += dn_fib.o dn_rules.o dn_table.o
 decnet-y += sysctl_net_decnet.o
 
 obj-$(CONFIG_NETFILTER) += netfilter/
-
index ebb5ac69d12830b49c4561e5746f5058ab50c77d..358e9eb490166059b67e7e6fe598c58963b013e7 100644 (file)
@@ -16,14 +16,14 @@ Steve's quick list of things that need finishing off:
 
  o Verify errors etc. against POSIX 1003.1g (draft)
 
- o Using send/recvmsg() to get at connect/disconnect data (POSIX 1003.1g) 
+ o Using send/recvmsg() to get at connect/disconnect data (POSIX 1003.1g)
    [maybe this should be done at socket level... the control data in the
     send/recvmsg() calls should simply be a vector of set/getsockopt()
     calls]
 
  o check MSG_CTRUNC is set where it should be.
 
- o Find all the commonality between DECnet and IPv4 routing code and extract 
+ o Find all the commonality between DECnet and IPv4 routing code and extract
    it into a small library of routines. [probably a project for 2.7.xx]
 
  o Add perfect socket hashing - an idea suggested by Paul Koning. Currently
@@ -38,4 +38,3 @@ Steve's quick list of things that need finishing off:
  o DECnet sendpages() function
 
  o AIO for DECnet
-
index fce94cbd4378c3620e2b73bd8ec2ea64cbc93384..f78fe58eafc82dba194d16d0627608440003f545 100644 (file)
@@ -797,5 +797,3 @@ void __init dn_fib_init(void)
        rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE,
                             dn_fib_rtm_delroute, NULL, 0);
 }
-
-
index 1b2120645730577de9156215bee70c9a2b9e59e2..2fb5e055ba25e6499cb0ce15c122220152af5a2d 100644 (file)
@@ -491,6 +491,7 @@ static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb)
                break;
        case DN_RUN:
                sk->sk_shutdown |= SHUTDOWN_MASK;
+               /* fall through */
        case DN_CC:
                scp->state = DN_CN;
        }
@@ -911,4 +912,3 @@ free_out:
 
        return NET_RX_SUCCESS;
 }
-
index 56a52a004c560b674b7172fdf38de283d6dbb36e..a1779de6bd9c315d504c342614068d77a058bfa6 100644 (file)
@@ -701,4 +701,3 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 
        dn_nsp_send(skb);
 }
-
index e74765024d88ffe7ceb3428114024fd7d510c58f..3107a2e24e6b2e76a7aaa9bf58c2a7b103bec617 100644 (file)
@@ -1925,4 +1925,3 @@ void __exit dn_route_cleanup(void)
        remove_proc_entry("decnet_cache", init_net.proc_net);
        dst_entries_destroy(&dn_dst_ops);
 }
-
index 72236695db3db2fb27e3b3414feafde008bfd8c6..4a4e3c17740cbb765dd3d11d41d32ef82e860d94 100644 (file)
@@ -256,5 +256,3 @@ void __exit dn_fib_rules_cleanup(void)
        rtnl_unlock();
        rcu_barrier();
 }
-
-
index 255c1ae9daeb0a280df4b7c8869a22326c4da801..b579e52130aa079a2ab6de08619bbc756b0db14b 100644 (file)
@@ -3,4 +3,3 @@
 #
 
 obj-$(CONFIG_DECNET_NF_GRABULATOR) += dn_rtmsg.o
-
index ab395e55cd7895691682548b4c042dfbc3a7b127..a4faacadd8a830b4f09eaa444e40b72a7e7807f8 100644 (file)
@@ -158,4 +158,3 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG);
 
 module_init(dn_rtmsg_init);
 module_exit(dn_rtmsg_fini);
-
index 0c9478b91fa5b6c8f6b586ed8ead66c8db538ea7..7f4534828f6c96c0b0fe99d54add4ce9be0c1877 100644 (file)
@@ -320,4 +320,3 @@ static void __exit exit_dns_resolver(void)
 module_init(init_dns_resolver)
 module_exit(exit_dns_resolver)
 MODULE_LICENSE("GPL");
-
index dc5d9af3dc80d29a8a2bc9bca37c349bd99d2e2a..a1917025e155bab3c96f9995594667c1383562f3 100644 (file)
@@ -775,6 +775,20 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
        if (!ds)
                return NULL;
 
+       /* We avoid allocating memory outside dsa_switch
+        * if it is not needed.
+        */
+       if (n <= sizeof(ds->_bitmap) * 8) {
+               ds->bitmap = &ds->_bitmap;
+       } else {
+               ds->bitmap = devm_kcalloc(dev,
+                                         BITS_TO_LONGS(n),
+                                         sizeof(unsigned long),
+                                         GFP_KERNEL);
+               if (unlikely(!ds->bitmap))
+                       return NULL;
+       }
+
        ds->dev = dev;
        ds->num_ports = n;
 
index 732369c80644a5eb056a1abd2d7de609082ed059..1ba3bde96b557df00726301527fae71e0f4c77c9 100644 (file)
@@ -900,7 +900,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 
        switch (f->command) {
        case TC_BLOCK_BIND:
-               return tcf_block_cb_register(f->block, cb, dev, dev);
+               return tcf_block_cb_register(f->block, cb, dev, dev, f->extack);
        case TC_BLOCK_UNBIND:
                tcf_block_cb_unregister(f->block, cb, dev);
                return 0;
index b93511726069e9d7059c8f8dc23b37cb0236744a..142b294d34468508f80457a6e3b5ffc35c680f07 100644 (file)
@@ -136,21 +136,20 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
 {
        const struct switchdev_obj_port_mdb *mdb = info->mdb;
        struct switchdev_trans *trans = info->trans;
-       DECLARE_BITMAP(group, ds->num_ports);
        int port;
 
        /* Build a mask of Multicast group members */
-       bitmap_zero(group, ds->num_ports);
+       bitmap_zero(ds->bitmap, ds->num_ports);
        if (ds->index == info->sw_index)
-               set_bit(info->port, group);
+               set_bit(info->port, ds->bitmap);
        for (port = 0; port < ds->num_ports; port++)
                if (dsa_is_dsa_port(ds, port))
-                       set_bit(port, group);
+                       set_bit(port, ds->bitmap);
 
        if (switchdev_trans_ph_prepare(trans))
-               return dsa_switch_mdb_prepare_bitmap(ds, mdb, group);
+               return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap);
 
-       dsa_switch_mdb_add_bitmap(ds, mdb, group);
+       dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap);
 
        return 0;
 }
@@ -204,21 +203,20 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
 {
        const struct switchdev_obj_port_vlan *vlan = info->vlan;
        struct switchdev_trans *trans = info->trans;
-       DECLARE_BITMAP(members, ds->num_ports);
        int port;
 
        /* Build a mask of VLAN members */
-       bitmap_zero(members, ds->num_ports);
+       bitmap_zero(ds->bitmap, ds->num_ports);
        if (ds->index == info->sw_index)
-               set_bit(info->port, members);
+               set_bit(info->port, ds->bitmap);
        for (port = 0; port < ds->num_ports; port++)
                if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
-                       set_bit(port, members);
+                       set_bit(port, ds->bitmap);
 
        if (switchdev_trans_ph_prepare(trans))
-               return dsa_switch_vlan_prepare_bitmap(ds, vlan, members);
+               return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap);
 
-       dsa_switch_vlan_add_bitmap(ds, vlan, members);
+       dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap);
 
        return 0;
 }
index ee28440f57c58f4eec29e67641a49efcbd36c8cd..fd8faa0dfa6193a186e562dae33008b4a0dc2182 100644 (file)
@@ -427,13 +427,13 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 }
 EXPORT_SYMBOL(sysfs_format_mac);
 
-struct sk_buff **eth_gro_receive(struct sk_buff **head,
-                                struct sk_buff *skb)
+struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
-       struct sk_buff *p, **pp = NULL;
-       struct ethhdr *eh, *eh2;
-       unsigned int hlen, off_eth;
        const struct packet_offload *ptype;
+       unsigned int hlen, off_eth;
+       struct sk_buff *pp = NULL;
+       struct ethhdr *eh, *eh2;
+       struct sk_buff *p;
        __be16 type;
        int flush = 1;
 
@@ -448,7 +448,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head,
 
        flush = 0;
 
-       for (p = *head; p; p = p->next) {
+       list_for_each_entry(p, head, list) {
                if (!NAPI_GRO_CB(p)->same_flow)
                        continue;
 
index 2cc224106b6928bafd92460ba59ea69db0778761..ec7a5da561290ac92c234338d306fb300e46e62c 100644 (file)
@@ -25,7 +25,7 @@
 
 #include <net/ieee802154_netdev.h>
 #include <net/6lowpan.h>
-#include <net/ipv6.h>
+#include <net/ipv6_frag.h>
 #include <net/inet_frag.h>
 
 #include "6lowpan_i.h"
index cb7176cd4cd62219ec52f2b82172d9b522d5b24c..fe225d9a187788cde0f3030b2e66287fb6ea0d24 100644 (file)
@@ -400,4 +400,3 @@ module_exit(wpan_phy_class_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("IEEE 802.15.4 configuration interface");
 MODULE_AUTHOR("Dmitry Eremin-Solenikov");
-
index 35c4326684548d88f408f84ded46ce6ac2d04ebc..78f6f1233194176ba55ac6d9394d37f1692ffff9 100644 (file)
@@ -75,4 +75,3 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
        [IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] = { .type = NLA_U8, },
        [IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] = { .type = NLA_U8, },
 };
-
index 80dad301361d9cc589b3a406ebb8d32a4e4a6827..32cae39cdff63125cb7213ae6644e8ff885796c0 100644 (file)
@@ -430,7 +430,7 @@ config INET_DIAG
          Support for INET (TCP, DCCP, etc) socket monitoring interface used by
          native Linux tools such as ss. ss is included in iproute2, currently
          downloadable at:
-         
+
            http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2
 
          If unsure, say Y.
@@ -600,7 +600,7 @@ config TCP_CONG_VENO
        distinguishing to circumvent the difficult judgment of the packet loss
        type. TCP Veno cuts down less congestion window in response to random
        loss packets.
-       See <http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=1177186> 
+       See <http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=1177186>
 
 config TCP_CONG_YEAH
        tristate "YeAH TCP"
index eec9569ffa5cb143ea4d4feb0ca7a73925ea33e5..7446b98661d86ef53ecd933cf3649ddf5ac4286b 100644 (file)
@@ -43,7 +43,7 @@ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
 obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
 obj-$(CONFIG_IP_PNP) += ipconfig.o
 obj-$(CONFIG_NETFILTER)        += netfilter.o netfilter/
-obj-$(CONFIG_INET_DIAG) += inet_diag.o 
+obj-$(CONFIG_INET_DIAG) += inet_diag.o
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
 obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
index b403499fdabea7367f65c588d957a30f5a6572b5..20fda8fb8ffda40a9ec61dead5ebacdbc4c1bc4a 100644 (file)
@@ -229,6 +229,7 @@ int inet_listen(struct socket *sock, int backlog)
                err = inet_csk_listen_start(sk, backlog);
                if (err)
                        goto out;
+               tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
        }
        sk->sk_max_ack_backlog = backlog;
        err = 0;
@@ -485,8 +486,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
         *  is temporarily down)
         */
        err = -EADDRNOTAVAIL;
-       if (!net->ipv4.sysctl_ip_nonlocal_bind &&
-           !(inet->freebind || inet->transparent) &&
+       if (!inet_can_nonlocal_bind(net, inet) &&
            addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
            chk_addr_ret != RTN_LOCAL &&
            chk_addr_ret != RTN_MULTICAST &&
@@ -1384,12 +1384,12 @@ out:
 }
 EXPORT_SYMBOL(inet_gso_segment);
 
-struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
        const struct net_offload *ops;
-       struct sk_buff **pp = NULL;
-       struct sk_buff *p;
+       struct sk_buff *pp = NULL;
        const struct iphdr *iph;
+       struct sk_buff *p;
        unsigned int hlen;
        unsigned int off;
        unsigned int id;
@@ -1425,7 +1425,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
        flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
        id >>= 16;
 
-       for (p = *head; p; p = p->next) {
+       list_for_each_entry(p, head, list) {
                struct iphdr *iph2;
                u16 flush_id;
 
@@ -1505,8 +1505,8 @@ out:
 }
 EXPORT_SYMBOL(inet_gro_receive);
 
-static struct sk_buff **ipip_gro_receive(struct sk_buff **head,
-                                        struct sk_buff *skb)
+static struct sk_buff *ipip_gro_receive(struct list_head *head,
+                                       struct sk_buff *skb)
 {
        if (NAPI_GRO_CB(skb)->encap_mark) {
                NAPI_GRO_CB(skb)->flush = 1;
@@ -1801,6 +1801,7 @@ static __net_init int inet_init_net(struct net *net)
         * We set them here, in case sysctl is not compiled.
         */
        net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
+       net->ipv4.sysctl_ip_fwd_update_priority = 1;
        net->ipv4.sysctl_ip_dynaddr = 0;
        net->ipv4.sysctl_ip_early_demux = 1;
        net->ipv4.sysctl_udp_early_demux = 1;
@@ -1882,6 +1883,7 @@ fs_initcall(ipv4_offload_init);
 static struct packet_type ip_packet_type __read_mostly = {
        .type = cpu_to_be16(ETH_P_IP),
        .func = ip_rcv,
+       .list_func = ip_list_rcv,
 };
 
 static int __init inet_init(void)
index ce262d76cc488bb5b2c808fa63b10ae18e289e68..e9e42f99725e62ed4a7728b3cbba05f1ea72edb3 100644 (file)
@@ -1,2 +1 @@
 obj-$(CONFIG_BPFILTER) += sockopt.o
-
index d7585ab1a77a0e9d0f942960811a7c61c70ae129..ea4bd8a52422e75c98e30cb3ab537e97b2170520 100644 (file)
@@ -1827,6 +1827,8 @@ static int inet_netconf_msgsize_devconf(int type)
                size += nla_total_size(4);
        if (all || type == NETCONFA_MC_FORWARDING)
                size += nla_total_size(4);
+       if (all || type == NETCONFA_BC_FORWARDING)
+               size += nla_total_size(4);
        if (all || type == NETCONFA_PROXY_NEIGH)
                size += nla_total_size(4);
        if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
@@ -1873,6 +1875,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
            nla_put_s32(skb, NETCONFA_MC_FORWARDING,
                        IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
                goto nla_put_failure;
+       if ((all || type == NETCONFA_BC_FORWARDING) &&
+           nla_put_s32(skb, NETCONFA_BC_FORWARDING,
+                       IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
+               goto nla_put_failure;
        if ((all || type == NETCONFA_PROXY_NEIGH) &&
            nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
                        IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
@@ -2143,6 +2149,10 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
                        if ((new_value == 0) && (old_value != 0))
                                rt_cache_flush(net);
 
+               if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
+                   new_value != old_value)
+                       rt_cache_flush(net);
+
                if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
                    new_value != old_value) {
                        ifindex = devinet_conf_ifindex(net, cnf);
@@ -2259,6 +2269,7 @@ static struct devinet_sysctl_table {
                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
                                             devinet_sysctl_forward),
                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
+               DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
 
                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
index 7cf755ef9efba3c13fcee22e1a24a320f4d9a503..58834a10c0be77b3d301169f505dcdc7896dd3f4 100644 (file)
@@ -28,8 +28,8 @@
 #include <linux/spinlock.h>
 #include <net/udp.h>
 
-static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
-                                        struct sk_buff *skb)
+static struct sk_buff *esp4_gro_receive(struct list_head *head,
+                                       struct sk_buff *skb)
 {
        int offset = skb_gro_offset(skb);
        struct xfrm_offload *xo;
@@ -135,8 +135,7 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
 
        skb->encap_hdr_csum = 1;
 
-       if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
-           (x->xso.dev != skb->dev))
+       if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev)
                esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
        else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
                esp_features = features & ~NETIF_F_CSUM_MASK;
@@ -179,8 +178,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features_
        if (!xo)
                return -EINVAL;
 
-       if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
-           (x->xso.dev != skb->dev)) {
+       if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) {
                xo->flags |= CRYPTO_FALLBACK;
                hw_offload = false;
        }
index c9ec1603666bffcfb24597b933a05f53b6d83440..500a59906b8719eb40fc3f37a0dc535b10b3069e 100644 (file)
@@ -224,14 +224,14 @@ drop:
        return 0;
 }
 
-static struct sk_buff **fou_gro_receive(struct sock *sk,
-                                       struct sk_buff **head,
-                                       struct sk_buff *skb)
+static struct sk_buff *fou_gro_receive(struct sock *sk,
+                                      struct list_head *head,
+                                      struct sk_buff *skb)
 {
-       const struct net_offload *ops;
-       struct sk_buff **pp = NULL;
        u8 proto = fou_from_sock(sk)->protocol;
        const struct net_offload **offloads;
+       const struct net_offload *ops;
+       struct sk_buff *pp = NULL;
 
        /* We can clear the encap_mark for FOU as we are essentially doing
         * one of two possible things.  We are either adding an L4 tunnel
@@ -305,13 +305,13 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
        return guehdr;
 }
 
-static struct sk_buff **gue_gro_receive(struct sock *sk,
-                                       struct sk_buff **head,
-                                       struct sk_buff *skb)
+static struct sk_buff *gue_gro_receive(struct sock *sk,
+                                      struct list_head *head,
+                                      struct sk_buff *skb)
 {
        const struct net_offload **offloads;
        const struct net_offload *ops;
-       struct sk_buff **pp = NULL;
+       struct sk_buff *pp = NULL;
        struct sk_buff *p;
        struct guehdr *guehdr;
        size_t len, optlen, hdrlen, off;
@@ -397,7 +397,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
 
        skb_gro_pull(skb, hdrlen);
 
-       for (p = *head; p; p = p->next) {
+       list_for_each_entry(p, head, list) {
                const struct guehdr *guehdr2;
 
                if (!NAPI_GRO_CB(p)->same_flow)
index 6a7d980105f60514c8180e6333f0a4a53912c3d5..6c63524f598a9b5171bfda0692df824883faa136 100644 (file)
@@ -108,10 +108,10 @@ out:
        return segs;
 }
 
-static struct sk_buff **gre_gro_receive(struct sk_buff **head,
-                                       struct sk_buff *skb)
+static struct sk_buff *gre_gro_receive(struct list_head *head,
+                                      struct sk_buff *skb)
 {
-       struct sk_buff **pp = NULL;
+       struct sk_buff *pp = NULL;
        struct sk_buff *p;
        const struct gre_base_hdr *greh;
        unsigned int hlen, grehlen;
@@ -182,7 +182,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
                                             null_compute_pseudo);
        }
 
-       for (p = *head; p; p = p->next) {
+       list_for_each_entry(p, head, list) {
                const struct gre_base_hdr *greh2;
 
                if (!NAPI_GRO_CB(p)->same_flow)
index 1617604c92847d5b0d058fac40dc3707f22348a5..695979b7ef6d08c2056384d90c9671efd7ae90dd 100644 (file)
@@ -429,14 +429,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 
        icmp_param->data.icmph.checksum = 0;
 
+       ipcm_init(&ipc);
        inet->tos = ip_hdr(skb)->tos;
        sk->sk_mark = mark;
        daddr = ipc.addr = ip_hdr(skb)->saddr;
        saddr = fib_compute_spec_dst(skb);
-       ipc.opt = NULL;
-       ipc.tx_flags = 0;
-       ipc.ttl = 0;
-       ipc.tos = -1;
 
        if (icmp_param->replyopts.opt.opt.optlen) {
                ipc.opt = &icmp_param->replyopts.opt;
@@ -710,11 +707,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
        icmp_param.offset = skb_network_offset(skb_in);
        inet_sk(sk)->tos = tos;
        sk->sk_mark = mark;
+       ipcm_init(&ipc);
        ipc.addr = iph->saddr;
        ipc.opt = &icmp_param.replyopts.opt;
-       ipc.tx_flags = 0;
-       ipc.ttl = 0;
-       ipc.tos = -1;
 
        rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
                               type, code, &icmp_param);
index 75151be21413fb60161087c23aefeb6c31093509..cf75f8944b05eb302c11cd8e2e0e9f762ab7e35c 100644 (file)
@@ -1288,7 +1288,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
 #endif
 }
 
-static void igmp_group_added(struct ip_mc_list *im, unsigned int mode)
+static void igmp_group_added(struct ip_mc_list *im)
 {
        struct in_device *in_dev = im->interface;
 #ifdef CONFIG_IP_MULTICAST
@@ -1320,7 +1320,7 @@ static void igmp_group_added(struct ip_mc_list *im, unsigned int mode)
         * not send filter-mode change record as the mode should be from
         * IN() to IN(A).
         */
-       if (mode == MCAST_EXCLUDE)
+       if (im->sfmode == MCAST_EXCLUDE)
                im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 
        igmp_ifc_event(in_dev);
@@ -1432,7 +1432,7 @@ static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
 #ifdef CONFIG_IP_MULTICAST
        igmpv3_del_delrec(in_dev, im);
 #endif
-       igmp_group_added(im, mode);
+       igmp_group_added(im);
        if (!in_dev->dead)
                ip_rt_multicast_event(in_dev);
 out:
@@ -1699,7 +1699,7 @@ void ip_mc_remap(struct in_device *in_dev)
 #ifdef CONFIG_IP_MULTICAST
                igmpv3_del_delrec(in_dev, pmc);
 #endif
-               igmp_group_added(pmc, pmc->sfmode);
+               igmp_group_added(pmc);
        }
 }
 
@@ -1762,7 +1762,7 @@ void ip_mc_up(struct in_device *in_dev)
 #ifdef CONFIG_IP_MULTICAST
                igmpv3_del_delrec(in_dev, pmc);
 #endif
-               igmp_group_added(pmc, pmc->sfmode);
+               igmp_group_added(pmc);
        }
 }
 
index 0d70608cc2e18bfa0df35f331e12fbe9b45b168b..ccd140e4082dd7de0823e9408e352725a8b4404c 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <net/sock.h>
 #include <net/inet_frag.h>
index b54b948b059608fc3157fedf40e61519321c6912..32662e9e5d218868341169bba1dc3ab430952c58 100644 (file)
@@ -143,7 +143,8 @@ int ip_forward(struct sk_buff *skb)
            !skb_sec_path(skb))
                ip_rt_send_redirect(skb);
 
-       skb->priority = rt_tos2priority(iph->tos);
+       if (net->ipv4.sysctl_ip_fwd_update_priority)
+               skb->priority = rt_tos2priority(iph->tos);
 
        return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
                       net, NULL, skb, skb->dev, rt->dst.dev,
index 2d8efeecf61976f00c0700cc7f64f749b9482a73..51a5d06085ac44777c622a58476d7700d7bc0a97 100644 (file)
@@ -587,6 +587,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
                goto err_free_skb;
 
        key = &tun_info->key;
+       if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+               goto err_free_rt;
        md = ip_tunnel_info_opts(tun_info);
        if (!md)
                goto err_free_rt;
@@ -983,7 +985,6 @@ static void ipgre_tunnel_setup(struct net_device *dev)
 static void __gre_tunnel_init(struct net_device *dev)
 {
        struct ip_tunnel *tunnel;
-       int t_hlen;
 
        tunnel = netdev_priv(dev);
        tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
@@ -991,8 +992,6 @@ static void __gre_tunnel_init(struct net_device *dev)
 
        tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 
-       t_hlen = tunnel->hlen + sizeof(struct iphdr);
-
        dev->features           |= GRE_FEATURES;
        dev->hw_features        |= GRE_FEATURES;
 
@@ -1302,13 +1301,11 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 static int erspan_tunnel_init(struct net_device *dev)
 {
        struct ip_tunnel *tunnel = netdev_priv(dev);
-       int t_hlen;
 
        tunnel->tun_hlen = 8;
        tunnel->parms.iph.protocol = IPPROTO_GRE;
        tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
                       erspan_hdr_len(tunnel->erspan_ver);
-       t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
        dev->features           |= GRE_FEATURES;
        dev->hw_features        |= GRE_FEATURES;
index 7582713dd18f37b5c27cdc85ff62626a8ad4f435..3196cf58f4189d4d11c3b895b15f430c720e0a2d 100644 (file)
@@ -307,7 +307,8 @@ drop:
        return true;
 }
 
-static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int ip_rcv_finish_core(struct net *net, struct sock *sk,
+                             struct sk_buff *skb)
 {
        const struct iphdr *iph = ip_hdr(skb);
        int (*edemux)(struct sk_buff *skb);
@@ -315,13 +316,6 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
        struct rtable *rt;
        int err;
 
-       /* if ingress device is enslaved to an L3 master device pass the
-        * skb to its handler for processing
-        */
-       skb = l3mdev_ip_rcv(skb);
-       if (!skb)
-               return NET_RX_SUCCESS;
-
        if (net->ipv4.sysctl_ip_early_demux &&
            !skb_dst(skb) &&
            !skb->sk &&
@@ -393,7 +387,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
                        goto drop;
        }
 
-       return dst_input(skb);
+       return NET_RX_SUCCESS;
 
 drop:
        kfree_skb(skb);
@@ -405,13 +399,29 @@ drop_error:
        goto drop;
 }
 
+static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+       int ret;
+
+       /* if ingress device is enslaved to an L3 master device pass the
+        * skb to its handler for processing
+        */
+       skb = l3mdev_ip_rcv(skb);
+       if (!skb)
+               return NET_RX_SUCCESS;
+
+       ret = ip_rcv_finish_core(net, sk, skb);
+       if (ret != NET_RX_DROP)
+               ret = dst_input(skb);
+       return ret;
+}
+
 /*
  *     Main IP Receive routine.
  */
-int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
 {
        const struct iphdr *iph;
-       struct net *net;
        u32 len;
 
        /* When the interface is in promisc. mode, drop all the crap
@@ -421,7 +431,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
                goto drop;
 
 
-       net = dev_net(dev);
        __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
 
        skb = skb_share_check(skb, GFP_ATOMIC);
@@ -489,9 +498,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
        /* Must drop socket now because of tproxy. */
        skb_orphan(skb);
 
-       return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
-                      net, NULL, skb, dev, NULL,
-                      ip_rcv_finish);
+       return skb;
 
 csum_error:
        __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
@@ -500,5 +507,113 @@ inhdr_error:
 drop:
        kfree_skb(skb);
 out:
-       return NET_RX_DROP;
+       return NULL;
+}
+
+/*
+ * IP receive entry point
+ */
+int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
+          struct net_device *orig_dev)
+{
+       struct net *net = dev_net(dev);
+
+       skb = ip_rcv_core(skb, net);
+       if (skb == NULL)
+               return NET_RX_DROP;
+       return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+                      net, NULL, skb, dev, NULL,
+                      ip_rcv_finish);
+}
+
+static void ip_sublist_rcv_finish(struct list_head *head)
+{
+       struct sk_buff *skb, *next;
+
+       list_for_each_entry_safe(skb, next, head, list) {
+               list_del(&skb->list);
+               /* Handle ip{6}_forward case, as sch_direct_xmit have
+                * another kind of SKB-list usage (see validate_xmit_skb_list)
+                */
+               skb->next = NULL;
+               dst_input(skb);
+       }
+}
+
+static void ip_list_rcv_finish(struct net *net, struct sock *sk,
+                              struct list_head *head)
+{
+       struct dst_entry *curr_dst = NULL;
+       struct sk_buff *skb, *next;
+       struct list_head sublist;
+
+       INIT_LIST_HEAD(&sublist);
+       list_for_each_entry_safe(skb, next, head, list) {
+               struct dst_entry *dst;
+
+               list_del(&skb->list);
+               /* if ingress device is enslaved to an L3 master device pass the
+                * skb to its handler for processing
+                */
+               skb = l3mdev_ip_rcv(skb);
+               if (!skb)
+                       continue;
+               if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP)
+                       continue;
+
+               dst = skb_dst(skb);
+               if (curr_dst != dst) {
+                       /* dispatch old sublist */
+                       if (!list_empty(&sublist))
+                               ip_sublist_rcv_finish(&sublist);
+                       /* start new sublist */
+                       INIT_LIST_HEAD(&sublist);
+                       curr_dst = dst;
+               }
+               list_add_tail(&skb->list, &sublist);
+       }
+       /* dispatch final sublist */
+       ip_sublist_rcv_finish(&sublist);
+}
+
+static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
+                          struct net *net)
+{
+       NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
+                    head, dev, NULL, ip_rcv_finish);
+       ip_list_rcv_finish(net, NULL, head);
+}
+
+/* Receive a list of IP packets */
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+                struct net_device *orig_dev)
+{
+       struct net_device *curr_dev = NULL;
+       struct net *curr_net = NULL;
+       struct sk_buff *skb, *next;
+       struct list_head sublist;
+
+       INIT_LIST_HEAD(&sublist);
+       list_for_each_entry_safe(skb, next, head, list) {
+               struct net_device *dev = skb->dev;
+               struct net *net = dev_net(dev);
+
+               list_del(&skb->list);
+               skb = ip_rcv_core(skb, net);
+               if (skb == NULL)
+                       continue;
+
+               if (curr_dev != dev || curr_net != net) {
+                       /* dispatch old sublist */
+                       if (!list_empty(&sublist))
+                               ip_sublist_rcv(&sublist, curr_dev, curr_net);
+                       /* start new sublist */
+                       INIT_LIST_HEAD(&sublist);
+                       curr_dev = dev;
+                       curr_net = net;
+               }
+               list_add_tail(&skb->list, &sublist);
+       }
+       /* dispatch final sublist */
+       ip_sublist_rcv(&sublist, curr_dev, curr_net);
 }
index 0e3edd25f881f1ad09201be0930734523721ebfc..9c4e72e9c60a7e2cfacdd45ce15f510d1db71a40 100644 (file)
@@ -423,7 +423,8 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
 }
 
 /* Note: skb->sk can be different from sk, in case of tunnels */
-int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
+int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+                   __u8 tos)
 {
        struct inet_sock *inet = inet_sk(sk);
        struct net *net = sock_net(sk);
@@ -462,7 +463,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
                                           inet->inet_dport,
                                           inet->inet_sport,
                                           sk->sk_protocol,
-                                          RT_CONN_FLAGS(sk),
+                                          RT_CONN_FLAGS_TOS(sk, tos),
                                           sk->sk_bound_dev_if);
                if (IS_ERR(rt))
                        goto no_route;
@@ -478,7 +479,7 @@ packet_routed:
        skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
        skb_reset_network_header(skb);
        iph = ip_hdr(skb);
-       *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
+       *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (tos & 0xff));
        if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
                iph->frag_off = htons(IP_DF);
        else
@@ -511,7 +512,7 @@ no_route:
        kfree_skb(skb);
        return -EHOSTUNREACH;
 }
-EXPORT_SYMBOL(ip_queue_xmit);
+EXPORT_SYMBOL(__ip_queue_xmit);
 
 static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 {
@@ -1147,14 +1148,15 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
        cork->fragsize = ip_sk_use_pmtu(sk) ?
                         dst_mtu(&rt->dst) : rt->dst.dev->mtu;
 
-       cork->gso_size = sk->sk_type == SOCK_DGRAM &&
-                        sk->sk_protocol == IPPROTO_UDP ? ipc->gso_size : 0;
+       cork->gso_size = ipc->gso_size;
        cork->dst = &rt->dst;
        cork->length = 0;
        cork->ttl = ipc->ttl;
        cork->tos = ipc->tos;
        cork->priority = ipc->priority;
-       cork->tx_flags = ipc->tx_flags;
+       cork->transmit_time = ipc->sockc.transmit_time;
+       cork->tx_flags = 0;
+       sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
 
        return 0;
 }
@@ -1415,6 +1417,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 
        skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
        skb->mark = sk->sk_mark;
+       skb->tstamp = cork->transmit_time;
        /*
         * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
         * on dst refcount
@@ -1547,11 +1550,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
        if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
                return;
 
+       ipcm_init(&ipc);
        ipc.addr = daddr;
-       ipc.opt = NULL;
-       ipc.tx_flags = 0;
-       ipc.ttl = 0;
-       ipc.tos = -1;
 
        if (replyopts.opt.opt.optlen) {
                ipc.opt = &replyopts.opt;
index 9f79b9803a161675c5907c1016fd219ad1f33fc9..5660adcf7a042ba675026a8397759618fd2a56b3 100644 (file)
@@ -60,6 +60,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/compat.h>
 #include <linux/export.h>
+#include <linux/rhashtable.h>
 #include <net/ip_tunnels.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
@@ -1051,7 +1052,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
        struct sk_buff *skb;
        int ret;
 
-       if (assert == IGMPMSG_WHOLEPKT)
+       if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
                skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
        else
                skb = alloc_skb(128, GFP_ATOMIC);
@@ -1059,7 +1060,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
        if (!skb)
                return -ENOBUFS;
 
-       if (assert == IGMPMSG_WHOLEPKT) {
+       if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) {
                /* Ugly, but we have no choice with this interface.
                 * Duplicate old header, fix ihl, length etc.
                 * And all this only to mangle msg->im_msgtype and
@@ -1070,9 +1071,12 @@ static int ipmr_cache_report(struct mr_table *mrt,
                skb_reset_transport_header(skb);
                msg = (struct igmpmsg *)skb_network_header(skb);
                memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
-               msg->im_msgtype = IGMPMSG_WHOLEPKT;
+               msg->im_msgtype = assert;
                msg->im_mbz = 0;
-               msg->im_vif = mrt->mroute_reg_vif_num;
+               if (assert == IGMPMSG_WRVIFWHOLE)
+                       msg->im_vif = vifi;
+               else
+                       msg->im_vif = mrt->mroute_reg_vif_num;
                ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
                ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
                                             sizeof(struct iphdr));
@@ -1371,6 +1375,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
        struct mr_table *mrt;
        struct vifctl vif;
        struct mfcctl mfc;
+       bool do_wrvifwhole;
        u32 uval;
 
        /* There's one exception to the lock - MRT_DONE which needs to unlock */
@@ -1501,10 +1506,12 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
                        break;
                }
 
+               do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
                val = !!val;
                if (val != mrt->mroute_do_pim) {
                        mrt->mroute_do_pim = val;
                        mrt->mroute_do_assert = val;
+                       mrt->mroute_do_wrvifwhole = do_wrvifwhole;
                }
                break;
        case MRT_TABLE:
@@ -1982,6 +1989,9 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                               MFC_ASSERT_THRESH)) {
                        c->_c.mfc_un.res.last_assert = jiffies;
                        ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
+                       if (mrt->mroute_do_wrvifwhole)
+                               ipmr_cache_report(mrt, skb, true_vifi,
+                                                 IGMPMSG_WRVIFWHOLE);
                }
                goto dont_forward;
        }
@@ -2658,7 +2668,9 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
                        mrt->mroute_reg_vif_num) ||
            nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
                       mrt->mroute_do_assert) ||
-           nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim))
+           nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) ||
+           nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
+                      mrt->mroute_do_wrvifwhole))
                return false;
 
        return true;
index cafb0506c8c99d57606c314863e7c05c2a81ba69..1ad9aa62a97b28e2f30c6d63bbad2afb34385a0c 100644 (file)
@@ -2,6 +2,7 @@
  * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
  */
 
+#include <linux/rhashtable.h>
 #include <linux/mroute_base.h>
 
 /* Sets everything common except 'dev', since that is done under locking */
index e6774ccb7731fe725bbf642d7ff0ee33d63f1359..8d2e5dc9a827dec61792be581c3407258c324723 100644 (file)
@@ -98,59 +98,6 @@ int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
 }
 EXPORT_SYMBOL_GPL(nf_ip_reroute);
 
-__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
-                           unsigned int dataoff, u_int8_t protocol)
-{
-       const struct iphdr *iph = ip_hdr(skb);
-       __sum16 csum = 0;
-
-       switch (skb->ip_summed) {
-       case CHECKSUM_COMPLETE:
-               if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
-                       break;
-               if ((protocol == 0 && !csum_fold(skb->csum)) ||
-                   !csum_tcpudp_magic(iph->saddr, iph->daddr,
-                                      skb->len - dataoff, protocol,
-                                      skb->csum)) {
-                       skb->ip_summed = CHECKSUM_UNNECESSARY;
-                       break;
-               }
-               /* fall through */
-       case CHECKSUM_NONE:
-               if (protocol == 0)
-                       skb->csum = 0;
-               else
-                       skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-                                                      skb->len - dataoff,
-                                                      protocol, 0);
-               csum = __skb_checksum_complete(skb);
-       }
-       return csum;
-}
-EXPORT_SYMBOL(nf_ip_checksum);
-
-__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
-                              unsigned int dataoff, unsigned int len,
-                              u_int8_t protocol)
-{
-       const struct iphdr *iph = ip_hdr(skb);
-       __sum16 csum = 0;
-
-       switch (skb->ip_summed) {
-       case CHECKSUM_COMPLETE:
-               if (len == skb->len - dataoff)
-                       return nf_ip_checksum(skb, hook, dataoff, protocol);
-               /* fall through */
-       case CHECKSUM_NONE:
-               skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
-                                              skb->len - dataoff, 0);
-               skb->ip_summed = CHECKSUM_NONE;
-               return __skb_checksum_complete_head(skb, dataoff + len);
-       }
-       return csum;
-}
-EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
-
 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
                bool strict __always_unused)
 {
index bbfc356cb1b52e78d6d60a781dff531258ef9b3c..d9504adc47b3df6e3311cd5860e34deb8efe6c95 100644 (file)
@@ -9,22 +9,6 @@ config NF_DEFRAG_IPV4
        tristate
        default n
 
-config NF_CONNTRACK_IPV4
-       tristate "IPv4 connection tracking support (required for NAT)"
-       depends on NF_CONNTRACK
-       default m if NETFILTER_ADVANCED=n
-       select NF_DEFRAG_IPV4
-       ---help---
-         Connection tracking keeps a record of what packets have passed
-         through your machine, in order to figure out how they are related
-         into connections.
-
-         This is IPv4 support on Layer 3 independent connection tracking.
-         Layer 3 independent connection tracking is experimental scheme
-         which generalize ip_conntrack to support other layer 3 protocols.
-
-         To compile it as a module, choose M here.  If unsure, say N.
-
 config NF_SOCKET_IPV4
        tristate "IPv4 socket lookup support"
        help
@@ -112,7 +96,7 @@ config NF_REJECT_IPV4
 
 config NF_NAT_IPV4
        tristate "IPv4 NAT"
-       depends on NF_CONNTRACK_IPV4
+       depends on NF_CONNTRACK
        default m if NETFILTER_ADVANCED=n
        select NF_NAT
        help
@@ -279,7 +263,7 @@ config IP_NF_TARGET_SYNPROXY
 # NAT + specific targets: nf_conntrack
 config IP_NF_NAT
        tristate "iptables NAT support"
-       depends on NF_CONNTRACK_IPV4
+       depends on NF_CONNTRACK
        default m if NETFILTER_ADVANCED=n
        select NF_NAT
        select NF_NAT_IPV4
@@ -340,7 +324,7 @@ config IP_NF_MANGLE
 config IP_NF_TARGET_CLUSTERIP
        tristate "CLUSTERIP target support"
        depends on IP_NF_MANGLE
-       depends on NF_CONNTRACK_IPV4
+       depends on NF_CONNTRACK
        depends on NETFILTER_ADVANCED
        select NF_CONNTRACK_MARK
        select NETFILTER_FAMILY_ARP
index 8394c17c269f29c73dad644eb7e8199cc2bcffc9..367993adf4d3170666c5680e0bd6ed175888cc5a 100644 (file)
@@ -3,12 +3,6 @@
 # Makefile for the netfilter modules on top of IPv4.
 #
 
-# objects for l3 independent conntrack
-nf_conntrack_ipv4-y    :=  nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
-
-# connection tracking
-obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
-
 nf_nat_ipv4-y          := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
 nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
 obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
deleted file mode 100644 (file)
index 9db988f..0000000
+++ /dev/null
@@ -1,472 +0,0 @@
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/icmp.h>
-#include <linux/sysctl.h>
-#include <net/route.h>
-#include <net/ip.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
-#include <net/netfilter/nf_log.h>
-
-static int conntrack4_net_id __read_mostly;
-static DEFINE_MUTEX(register_ipv4_hooks);
-
-struct conntrack4_net {
-       unsigned int users;
-};
-
-static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
-                             struct nf_conntrack_tuple *tuple)
-{
-       const __be32 *ap;
-       __be32 _addrs[2];
-       ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
-                               sizeof(u_int32_t) * 2, _addrs);
-       if (ap == NULL)
-               return false;
-
-       tuple->src.u3.ip = ap[0];
-       tuple->dst.u3.ip = ap[1];
-
-       return true;
-}
-
-static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
-                             const struct nf_conntrack_tuple *orig)
-{
-       tuple->src.u3.ip = orig->dst.u3.ip;
-       tuple->dst.u3.ip = orig->src.u3.ip;
-
-       return true;
-}
-
-static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
-                           unsigned int *dataoff, u_int8_t *protonum)
-{
-       const struct iphdr *iph;
-       struct iphdr _iph;
-
-       iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
-       if (iph == NULL)
-               return -NF_ACCEPT;
-
-       /* Conntrack defragments packets, we might still see fragments
-        * inside ICMP packets though. */
-       if (iph->frag_off & htons(IP_OFFSET))
-               return -NF_ACCEPT;
-
-       *dataoff = nhoff + (iph->ihl << 2);
-       *protonum = iph->protocol;
-
-       /* Check bogus IP headers */
-       if (*dataoff > skb->len) {
-               pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
-                        "nhoff %u, ihl %u, skblen %u\n",
-                        nhoff, iph->ihl << 2, skb->len);
-               return -NF_ACCEPT;
-       }
-
-       return NF_ACCEPT;
-}
-
-static unsigned int ipv4_helper(void *priv,
-                               struct sk_buff *skb,
-                               const struct nf_hook_state *state)
-{
-       struct nf_conn *ct;
-       enum ip_conntrack_info ctinfo;
-       const struct nf_conn_help *help;
-       const struct nf_conntrack_helper *helper;
-
-       /* This is where we call the helper: as the packet goes out. */
-       ct = nf_ct_get(skb, &ctinfo);
-       if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-               return NF_ACCEPT;
-
-       help = nfct_help(ct);
-       if (!help)
-               return NF_ACCEPT;
-
-       /* rcu_read_lock()ed by nf_hook_thresh */
-       helper = rcu_dereference(help->helper);
-       if (!helper)
-               return NF_ACCEPT;
-
-       return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
-                           ct, ctinfo);
-}
-
-static unsigned int ipv4_confirm(void *priv,
-                                struct sk_buff *skb,
-                                const struct nf_hook_state *state)
-{
-       struct nf_conn *ct;
-       enum ip_conntrack_info ctinfo;
-
-       ct = nf_ct_get(skb, &ctinfo);
-       if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-               goto out;
-
-       /* adjust seqs for loopback traffic only in outgoing direction */
-       if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
-           !nf_is_loopback_packet(skb)) {
-               if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
-                       NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
-                       return NF_DROP;
-               }
-       }
-out:
-       /* We've seen it coming out the other side: confirm it */
-       return nf_conntrack_confirm(skb);
-}
-
-static unsigned int ipv4_conntrack_in(void *priv,
-                                     struct sk_buff *skb,
-                                     const struct nf_hook_state *state)
-{
-       return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
-}
-
-static unsigned int ipv4_conntrack_local(void *priv,
-                                        struct sk_buff *skb,
-                                        const struct nf_hook_state *state)
-{
-       if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
-               enum ip_conntrack_info ctinfo;
-               struct nf_conn *tmpl;
-
-               tmpl = nf_ct_get(skb, &ctinfo);
-               if (tmpl && nf_ct_is_template(tmpl)) {
-                       /* when skipping ct, clear templates to avoid fooling
-                        * later targets/matches
-                        */
-                       skb->_nfct = 0;
-                       nf_ct_put(tmpl);
-               }
-               return NF_ACCEPT;
-       }
-
-       return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
-}
-
-/* Connection tracking may drop packets, but never alters them, so
-   make it the first hook. */
-static const struct nf_hook_ops ipv4_conntrack_ops[] = {
-       {
-               .hook           = ipv4_conntrack_in,
-               .pf             = NFPROTO_IPV4,
-               .hooknum        = NF_INET_PRE_ROUTING,
-               .priority       = NF_IP_PRI_CONNTRACK,
-       },
-       {
-               .hook           = ipv4_conntrack_local,
-               .pf             = NFPROTO_IPV4,
-               .hooknum        = NF_INET_LOCAL_OUT,
-               .priority       = NF_IP_PRI_CONNTRACK,
-       },
-       {
-               .hook           = ipv4_helper,
-               .pf             = NFPROTO_IPV4,
-               .hooknum        = NF_INET_POST_ROUTING,
-               .priority       = NF_IP_PRI_CONNTRACK_HELPER,
-       },
-       {
-               .hook           = ipv4_confirm,
-               .pf             = NFPROTO_IPV4,
-               .hooknum        = NF_INET_POST_ROUTING,
-               .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
-       },
-       {
-               .hook           = ipv4_helper,
-               .pf             = NFPROTO_IPV4,
-               .hooknum        = NF_INET_LOCAL_IN,
-               .priority       = NF_IP_PRI_CONNTRACK_HELPER,
-       },
-       {
-               .hook           = ipv4_confirm,
-               .pf             = NFPROTO_IPV4,
-               .hooknum        = NF_INET_LOCAL_IN,
-               .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
-       },
-};
-
-/* Fast function for those who don't want to parse /proc (and I don't
-   blame them). */
-/* Reversing the socket's dst/src point of view gives us the reply
-   mapping. */
-static int
-getorigdst(struct sock *sk, int optval, void __user *user, int *len)
-{
-       const struct inet_sock *inet = inet_sk(sk);
-       const struct nf_conntrack_tuple_hash *h;
-       struct nf_conntrack_tuple tuple;
-
-       memset(&tuple, 0, sizeof(tuple));
-
-       lock_sock(sk);
-       tuple.src.u3.ip = inet->inet_rcv_saddr;
-       tuple.src.u.tcp.port = inet->inet_sport;
-       tuple.dst.u3.ip = inet->inet_daddr;
-       tuple.dst.u.tcp.port = inet->inet_dport;
-       tuple.src.l3num = PF_INET;
-       tuple.dst.protonum = sk->sk_protocol;
-       release_sock(sk);
-
-       /* We only do TCP and SCTP at the moment: is there a better way? */
-       if (tuple.dst.protonum != IPPROTO_TCP &&
-           tuple.dst.protonum != IPPROTO_SCTP) {
-               pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
-               return -ENOPROTOOPT;
-       }
-
-       if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
-               pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
-                        *len, sizeof(struct sockaddr_in));
-               return -EINVAL;
-       }
-
-       h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
-       if (h) {
-               struct sockaddr_in sin;
-               struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
-               sin.sin_family = AF_INET;
-               sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
-                       .tuple.dst.u.tcp.port;
-               sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
-                       .tuple.dst.u3.ip;
-               memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
-
-               pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
-                        &sin.sin_addr.s_addr, ntohs(sin.sin_port));
-               nf_ct_put(ct);
-               if (copy_to_user(user, &sin, sizeof(sin)) != 0)
-                       return -EFAULT;
-               else
-                       return 0;
-       }
-       pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
-                &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
-                &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
-       return -ENOENT;
-}
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
-                               const struct nf_conntrack_tuple *tuple)
-{
-       if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
-           nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
-               goto nla_put_failure;
-       return 0;
-
-nla_put_failure:
-       return -1;
-}
-
-static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
-       [CTA_IP_V4_SRC] = { .type = NLA_U32 },
-       [CTA_IP_V4_DST] = { .type = NLA_U32 },
-};
-
-static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
-                               struct nf_conntrack_tuple *t)
-{
-       if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
-               return -EINVAL;
-
-       t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
-       t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
-
-       return 0;
-}
-#endif
-
-static struct nf_sockopt_ops so_getorigdst = {
-       .pf             = PF_INET,
-       .get_optmin     = SO_ORIGINAL_DST,
-       .get_optmax     = SO_ORIGINAL_DST+1,
-       .get            = getorigdst,
-       .owner          = THIS_MODULE,
-};
-
-static int ipv4_hooks_register(struct net *net)
-{
-       struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
-       int err = 0;
-
-       mutex_lock(&register_ipv4_hooks);
-
-       cnet->users++;
-       if (cnet->users > 1)
-               goto out_unlock;
-
-       err = nf_defrag_ipv4_enable(net);
-       if (err) {
-               cnet->users = 0;
-               goto out_unlock;
-       }
-
-       err = nf_register_net_hooks(net, ipv4_conntrack_ops,
-                                   ARRAY_SIZE(ipv4_conntrack_ops));
-
-       if (err)
-               cnet->users = 0;
- out_unlock:
-       mutex_unlock(&register_ipv4_hooks);
-       return err;
-}
-
-static void ipv4_hooks_unregister(struct net *net)
-{
-       struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
-
-       mutex_lock(&register_ipv4_hooks);
-       if (cnet->users && (--cnet->users == 0))
-               nf_unregister_net_hooks(net, ipv4_conntrack_ops,
-                                       ARRAY_SIZE(ipv4_conntrack_ops));
-       mutex_unlock(&register_ipv4_hooks);
-}
-
-const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
-       .l3proto         = PF_INET,
-       .pkt_to_tuple    = ipv4_pkt_to_tuple,
-       .invert_tuple    = ipv4_invert_tuple,
-       .get_l4proto     = ipv4_get_l4proto,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       .tuple_to_nlattr = ipv4_tuple_to_nlattr,
-       .nlattr_to_tuple = ipv4_nlattr_to_tuple,
-       .nla_policy      = ipv4_nla_policy,
-       .nla_size        = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */
-                          NLA_ALIGN(NLA_HDRLEN + sizeof(u32)),  /* CTA_IP_V4_DST */
-#endif
-       .net_ns_get      = ipv4_hooks_register,
-       .net_ns_put      = ipv4_hooks_unregister,
-       .me              = THIS_MODULE,
-};
-
-module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
-                 &nf_conntrack_htable_size, 0600);
-
-MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
-MODULE_ALIAS("ip_conntrack");
-MODULE_LICENSE("GPL");
-
-static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = {
-       &nf_conntrack_l4proto_tcp4,
-       &nf_conntrack_l4proto_udp4,
-       &nf_conntrack_l4proto_icmp,
-#ifdef CONFIG_NF_CT_PROTO_DCCP
-       &nf_conntrack_l4proto_dccp4,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
-       &nf_conntrack_l4proto_sctp4,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-       &nf_conntrack_l4proto_udplite4,
-#endif
-};
-
-static int ipv4_net_init(struct net *net)
-{
-       return nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
-                                            ARRAY_SIZE(builtin_l4proto4));
-}
-
-static void ipv4_net_exit(struct net *net)
-{
-       nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
-                                       ARRAY_SIZE(builtin_l4proto4));
-}
-
-static struct pernet_operations ipv4_net_ops = {
-       .init = ipv4_net_init,
-       .exit = ipv4_net_exit,
-       .id = &conntrack4_net_id,
-       .size = sizeof(struct conntrack4_net),
-};
-
-static int __init nf_conntrack_l3proto_ipv4_init(void)
-{
-       int ret = 0;
-
-       need_conntrack();
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) !=
-           nf_conntrack_l3proto_ipv4.nla_size))
-               return -EINVAL;
-#endif
-       ret = nf_register_sockopt(&so_getorigdst);
-       if (ret < 0) {
-               pr_err("Unable to register netfilter socket option\n");
-               return ret;
-       }
-
-       ret = register_pernet_subsys(&ipv4_net_ops);
-       if (ret < 0) {
-               pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
-               goto cleanup_sockopt;
-       }
-
-       ret = nf_ct_l4proto_register(builtin_l4proto4,
-                                    ARRAY_SIZE(builtin_l4proto4));
-       if (ret < 0)
-               goto cleanup_pernet;
-
-       ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
-       if (ret < 0) {
-               pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
-               goto cleanup_l4proto;
-       }
-
-       return ret;
-cleanup_l4proto:
-       nf_ct_l4proto_unregister(builtin_l4proto4,
-                                ARRAY_SIZE(builtin_l4proto4));
- cleanup_pernet:
-       unregister_pernet_subsys(&ipv4_net_ops);
- cleanup_sockopt:
-       nf_unregister_sockopt(&so_getorigdst);
-       return ret;
-}
-
-static void __exit nf_conntrack_l3proto_ipv4_fini(void)
-{
-       synchronize_net();
-       nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
-       nf_ct_l4proto_unregister(builtin_l4proto4,
-                                ARRAY_SIZE(builtin_l4proto4));
-       unregister_pernet_subsys(&ipv4_net_ops);
-       nf_unregister_sockopt(&so_getorigdst);
-}
-
-module_init(nf_conntrack_l3proto_ipv4_init);
-module_exit(nf_conntrack_l3proto_ipv4_fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
deleted file mode 100644 (file)
index 5c15bea..0000000
+++ /dev/null
@@ -1,383 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/icmp.h>
-#include <linux/seq_file.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack_tuple.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/nf_log.h>
-
-static const unsigned int nf_ct_icmp_timeout = 30*HZ;
-
-static inline struct nf_icmp_net *icmp_pernet(struct net *net)
-{
-       return &net->ct.nf_ct_proto.icmp;
-}
-
-static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-                             struct net *net, struct nf_conntrack_tuple *tuple)
-{
-       const struct icmphdr *hp;
-       struct icmphdr _hdr;
-
-       hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-       if (hp == NULL)
-               return false;
-
-       tuple->dst.u.icmp.type = hp->type;
-       tuple->src.u.icmp.id = hp->un.echo.id;
-       tuple->dst.u.icmp.code = hp->code;
-
-       return true;
-}
-
-/* Add 1; spaces filled with 0. */
-static const u_int8_t invmap[] = {
-       [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-       [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-       [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-       [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-       [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-       [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-       [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-       [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
-};
-
-static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
-                             const struct nf_conntrack_tuple *orig)
-{
-       if (orig->dst.u.icmp.type >= sizeof(invmap) ||
-           !invmap[orig->dst.u.icmp.type])
-               return false;
-
-       tuple->src.u.icmp.id = orig->src.u.icmp.id;
-       tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
-       tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
-       return true;
-}
-
-static unsigned int *icmp_get_timeouts(struct net *net)
-{
-       return &icmp_pernet(net)->timeout;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int icmp_packet(struct nf_conn *ct,
-                      const struct sk_buff *skb,
-                      unsigned int dataoff,
-                      enum ip_conntrack_info ctinfo,
-                      unsigned int *timeout)
-{
-       /* Do not immediately delete the connection after the first
-          successful reply to avoid excessive conntrackd traffic
-          and also to handle correctly ICMP echo reply duplicates. */
-       nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
-
-       return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
-                    unsigned int dataoff, unsigned int *timeouts)
-{
-       static const u_int8_t valid_new[] = {
-               [ICMP_ECHO] = 1,
-               [ICMP_TIMESTAMP] = 1,
-               [ICMP_INFO_REQUEST] = 1,
-               [ICMP_ADDRESS] = 1
-       };
-
-       if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
-           !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
-               /* Can't create a new ICMP `conn' with this. */
-               pr_debug("icmp: can't create new conn with type %u\n",
-                        ct->tuplehash[0].tuple.dst.u.icmp.type);
-               nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
-               return false;
-       }
-       return true;
-}
-
-/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
-static int
-icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
-                unsigned int hooknum)
-{
-       struct nf_conntrack_tuple innertuple, origtuple;
-       const struct nf_conntrack_l4proto *innerproto;
-       const struct nf_conntrack_tuple_hash *h;
-       const struct nf_conntrack_zone *zone;
-       enum ip_conntrack_info ctinfo;
-       struct nf_conntrack_zone tmp;
-
-       WARN_ON(skb_nfct(skb));
-       zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
-
-       /* Are they talking about one of our connections? */
-       if (!nf_ct_get_tuplepr(skb,
-                              skb_network_offset(skb) + ip_hdrlen(skb)
-                                                      + sizeof(struct icmphdr),
-                              PF_INET, net, &origtuple)) {
-               pr_debug("icmp_error_message: failed to get tuple\n");
-               return -NF_ACCEPT;
-       }
-
-       /* rcu_read_lock()ed by nf_hook_thresh */
-       innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
-
-       /* Ordinarily, we'd expect the inverted tupleproto, but it's
-          been preserved inside the ICMP. */
-       if (!nf_ct_invert_tuple(&innertuple, &origtuple,
-                               &nf_conntrack_l3proto_ipv4, innerproto)) {
-               pr_debug("icmp_error_message: no match\n");
-               return -NF_ACCEPT;
-       }
-
-       ctinfo = IP_CT_RELATED;
-
-       h = nf_conntrack_find_get(net, zone, &innertuple);
-       if (!h) {
-               pr_debug("icmp_error_message: no match\n");
-               return -NF_ACCEPT;
-       }
-
-       if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
-               ctinfo += IP_CT_IS_REPLY;
-
-       /* Update skb to refer to this connection */
-       nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
-       return NF_ACCEPT;
-}
-
-static void icmp_error_log(const struct sk_buff *skb, struct net *net,
-                          u8 pf, const char *msg)
-{
-       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
-}
-
-/* Small and modified version of icmp_rcv */
-static int
-icmp_error(struct net *net, struct nf_conn *tmpl,
-          struct sk_buff *skb, unsigned int dataoff,
-          u8 pf, unsigned int hooknum)
-{
-       const struct icmphdr *icmph;
-       struct icmphdr _ih;
-
-       /* Not enough header? */
-       icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
-       if (icmph == NULL) {
-               icmp_error_log(skb, net, pf, "short packet");
-               return -NF_ACCEPT;
-       }
-
-       /* See ip_conntrack_proto_tcp.c */
-       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-           nf_ip_checksum(skb, hooknum, dataoff, 0)) {
-               icmp_error_log(skb, net, pf, "bad hw icmp checksum");
-               return -NF_ACCEPT;
-       }
-
-       /*
-        *      18 is the highest 'known' ICMP type. Anything else is a mystery
-        *
-        *      RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
-        *                discarded.
-        */
-       if (icmph->type > NR_ICMP_TYPES) {
-               icmp_error_log(skb, net, pf, "invalid icmp type");
-               return -NF_ACCEPT;
-       }
-
-       /* Need to track icmp error message? */
-       if (icmph->type != ICMP_DEST_UNREACH &&
-           icmph->type != ICMP_SOURCE_QUENCH &&
-           icmph->type != ICMP_TIME_EXCEEDED &&
-           icmph->type != ICMP_PARAMETERPROB &&
-           icmph->type != ICMP_REDIRECT)
-               return NF_ACCEPT;
-
-       return icmp_error_message(net, tmpl, skb, hooknum);
-}
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-static int icmp_tuple_to_nlattr(struct sk_buff *skb,
-                               const struct nf_conntrack_tuple *t)
-{
-       if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
-           nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
-           nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
-               goto nla_put_failure;
-       return 0;
-
-nla_put_failure:
-       return -1;
-}
-
-static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
-       [CTA_PROTO_ICMP_TYPE]   = { .type = NLA_U8 },
-       [CTA_PROTO_ICMP_CODE]   = { .type = NLA_U8 },
-       [CTA_PROTO_ICMP_ID]     = { .type = NLA_U16 },
-};
-
-static int icmp_nlattr_to_tuple(struct nlattr *tb[],
-                               struct nf_conntrack_tuple *tuple)
-{
-       if (!tb[CTA_PROTO_ICMP_TYPE] ||
-           !tb[CTA_PROTO_ICMP_CODE] ||
-           !tb[CTA_PROTO_ICMP_ID])
-               return -EINVAL;
-
-       tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
-       tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
-       tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
-
-       if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
-           !invmap[tuple->dst.u.icmp.type])
-               return -EINVAL;
-
-       return 0;
-}
-
-static unsigned int icmp_nlattr_tuple_size(void)
-{
-       static unsigned int size __read_mostly;
-
-       if (!size)
-               size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
-
-       return size;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_cttimeout.h>
-
-static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
-                                     struct net *net, void *data)
-{
-       unsigned int *timeout = data;
-       struct nf_icmp_net *in = icmp_pernet(net);
-
-       if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
-               *timeout =
-                       ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
-       } else {
-               /* Set default ICMP timeout. */
-               *timeout = in->timeout;
-       }
-       return 0;
-}
-
-static int
-icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
-{
-       const unsigned int *timeout = data;
-
-       if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
-               goto nla_put_failure;
-       return 0;
-
-nla_put_failure:
-       return -ENOSPC;
-}
-
-static const struct nla_policy
-icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
-       [CTA_TIMEOUT_ICMP_TIMEOUT]      = { .type = NLA_U32 },
-};
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table icmp_sysctl_table[] = {
-       {
-               .procname       = "nf_conntrack_icmp_timeout",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       { }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
-                                    struct nf_icmp_net *in)
-{
-#ifdef CONFIG_SYSCTL
-       pn->ctl_table = kmemdup(icmp_sysctl_table,
-                               sizeof(icmp_sysctl_table),
-                               GFP_KERNEL);
-       if (!pn->ctl_table)
-               return -ENOMEM;
-
-       pn->ctl_table[0].data = &in->timeout;
-#endif
-       return 0;
-}
-
-static int icmp_init_net(struct net *net, u_int16_t proto)
-{
-       struct nf_icmp_net *in = icmp_pernet(net);
-       struct nf_proto_net *pn = &in->pn;
-
-       in->timeout = nf_ct_icmp_timeout;
-
-       return icmp_kmemdup_sysctl_table(pn, in);
-}
-
-static struct nf_proto_net *icmp_get_net_proto(struct net *net)
-{
-       return &net->ct.nf_ct_proto.icmp.pn;
-}
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
-{
-       .l3proto                = PF_INET,
-       .l4proto                = IPPROTO_ICMP,
-       .pkt_to_tuple           = icmp_pkt_to_tuple,
-       .invert_tuple           = icmp_invert_tuple,
-       .packet                 = icmp_packet,
-       .get_timeouts           = icmp_get_timeouts,
-       .new                    = icmp_new,
-       .error                  = icmp_error,
-       .destroy                = NULL,
-       .me                     = NULL,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       .tuple_to_nlattr        = icmp_tuple_to_nlattr,
-       .nlattr_tuple_size      = icmp_nlattr_tuple_size,
-       .nlattr_to_tuple        = icmp_nlattr_to_tuple,
-       .nla_policy             = icmp_nla_policy,
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
-       .ctnl_timeout           = {
-               .nlattr_to_obj  = icmp_timeout_nlattr_to_obj,
-               .obj_to_nlattr  = icmp_timeout_obj_to_nlattr,
-               .nlattr_max     = CTA_TIMEOUT_ICMP_MAX,
-               .obj_size       = sizeof(unsigned int),
-               .nla_policy     = icmp_timeout_nla_policy,
-       },
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-       .init_net               = icmp_init_net,
-       .get_net_proto          = icmp_get_net_proto,
-};
index 4388de0e5380c6423fbdfe7438727900fc297d7c..1e6f28c97d3a23bc4b17944a66ae92a93b214f69 100644 (file)
@@ -35,7 +35,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* One level of recursion won't kill us */
-static void dump_ipv4_packet(struct nf_log_buf *m,
+static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
                             const struct nf_loginfo *info,
                             const struct sk_buff *skb, unsigned int iphoff)
 {
@@ -183,7 +183,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
                        /* Max length: 3+maxlen */
                        if (!iphoff) { /* Only recurse once. */
                                nf_log_buf_add(m, "[");
-                               dump_ipv4_packet(m, info, skb,
+                               dump_ipv4_packet(net, m, info, skb,
                                            iphoff + ih->ihl*4+sizeof(_icmph));
                                nf_log_buf_add(m, "] ");
                        }
@@ -251,7 +251,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
 
        /* Max length: 15 "UID=4294967295 " */
        if ((logflags & NF_LOG_UID) && !iphoff)
-               nf_log_dump_sk_uid_gid(m, skb->sk);
+               nf_log_dump_sk_uid_gid(net, m, skb->sk);
 
        /* Max length: 16 "MARK=0xFFFFFFFF " */
        if (!iphoff && skb->mark)
@@ -333,7 +333,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
        if (in != NULL)
                dump_ipv4_mac_header(m, loginfo, skb);
 
-       dump_ipv4_packet(m, loginfo, skb, 0);
+       dump_ipv4_packet(net, m, loginfo, skb, 0);
 
        nf_log_buf_close(m);
 }
index 2ed64bca54e351e1ab51f7604b65ac72cbfb8e59..8d7aaf118a30106030d26780dd1c6e3ae5b90ba9 100644 (file)
@@ -320,8 +320,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
                if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
                        chk_addr_ret = RTN_LOCAL;
 
-               if ((net->ipv4.sysctl_ip_nonlocal_bind == 0 &&
-                   isk->freebind == 0 && isk->transparent == 0 &&
+               if ((!inet_can_nonlocal_bind(net, isk) &&
                     chk_addr_ret != RTN_LOCAL) ||
                    chk_addr_ret == RTN_MULTICAST ||
                    chk_addr_ret == RTN_BROADCAST)
@@ -361,8 +360,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
                                                    scoped);
                rcu_read_unlock();
 
-               if (!(net->ipv6.sysctl.ip_nonlocal_bind ||
-                     isk->freebind || isk->transparent || has_addr ||
+               if (!(ipv6_can_nonlocal_bind(net, isk) || has_addr ||
                      addr_type == IPV6_ADDR_ANY))
                        return -EADDRNOTAVAIL;
 
@@ -739,13 +737,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                /* no remote port */
        }
 
-       ipc.sockc.tsflags = sk->sk_tsflags;
-       ipc.addr = inet->inet_saddr;
-       ipc.opt = NULL;
-       ipc.oif = sk->sk_bound_dev_if;
-       ipc.tx_flags = 0;
-       ipc.ttl = 0;
-       ipc.tos = -1;
+       ipcm_init_sk(&ipc, inet);
 
        if (msg->msg_controllen) {
                err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -769,8 +761,6 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                rcu_read_unlock();
        }
 
-       sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
        saddr = ipc.addr;
        ipc.addr = faddr = daddr;
 
index 77350c1256ce9bda462f5c81c91af3834e41b04a..b46e4cf9a55a1aa58e1fa344443e184053e05ffd 100644 (file)
@@ -287,6 +287,8 @@ static const struct snmp_mib snmp4_net_list[] = {
        SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED),
        SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE),
        SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
+       SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
+       SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
        SNMP_MIB_SENTINEL
 };
 
index abb3c9490c551781822f0fb40ca2bafe960c1339..33df4d76db2d948d620ffc809574b364ae24ad4b 100644 (file)
@@ -381,6 +381,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
        skb->priority = sk->sk_priority;
        skb->mark = sk->sk_mark;
+       skb->tstamp = sockc->transmit_time;
        skb_dst_set(skb, &rt->dst);
        *rtp = NULL;
 
@@ -561,13 +562,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                daddr = inet->inet_daddr;
        }
 
-       ipc.sockc.tsflags = sk->sk_tsflags;
-       ipc.addr = inet->inet_saddr;
-       ipc.opt = NULL;
-       ipc.tx_flags = 0;
-       ipc.ttl = 0;
-       ipc.tos = -1;
-       ipc.oif = sk->sk_bound_dev_if;
+       ipcm_init_sk(&ipc, inet);
 
        if (msg->msg_controllen) {
                err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -670,8 +665,6 @@ back_from_confirm:
                                      &rt, msg->msg_flags, &ipc.sockc);
 
         else {
-               sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
                if (!ipc.addr)
                        ipc.addr = fl4.daddr;
                lock_sock(sk);
index 1df6e97106d79eef9dfde27472c5f9c20cae3943..b678466da451c994b7baec77f96c482afe62da4b 100644 (file)
@@ -1996,8 +1996,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                goto no_route;
        }
 
-       if (res->type == RTN_BROADCAST)
+       if (res->type == RTN_BROADCAST) {
+               if (IN_DEV_BFORWARD(in_dev))
+                       goto make_route;
                goto brd_input;
+       }
 
        if (res->type == RTN_LOCAL) {
                err = fib_validate_source(skb, saddr, daddr, tos,
@@ -2014,6 +2017,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        if (res->type != RTN_UNICAST)
                goto martian_destination;
 
+make_route:
        err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
 out:   return err;
 
index 5fa335fd385254def583b9a5100fbe7b9ce94cd6..b92f422f2fa805cd5cca8264fe9ae5aa6d6a65b8 100644 (file)
@@ -201,6 +201,23 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
        return ret;
 }
 
+static int ipv4_fwd_update_priority(struct ctl_table *table, int write,
+                                   void __user *buffer,
+                                   size_t *lenp, loff_t *ppos)
+{
+       struct net *net;
+       int ret;
+
+       net = container_of(table->data, struct net,
+                          ipv4.sysctl_ip_fwd_update_priority);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       if (write && ret == 0)
+               call_netevent_notifiers(NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE,
+                                       net);
+
+       return ret;
+}
+
 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
                                       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -663,6 +680,15 @@ static struct ctl_table ipv4_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "ip_forward_update_priority",
+               .data           = &init_net.ipv4.sysctl_ip_fwd_update_priority,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = ipv4_fwd_update_priority,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
        {
                .procname       = "ip_nonlocal_bind",
                .data           = &init_net.ipv4.sysctl_ip_nonlocal_bind,
index 4491faf83f4f93cf4384f7b192ffe3022567cc0a..b8af2fec5ad59a0ddd2590499e2c1e977646485c 100644 (file)
@@ -507,7 +507,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
        const struct tcp_sock *tp = tcp_sk(sk);
        int state;
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
 
        state = inet_sk_state_load(sk);
        if (state == TCP_LISTEN)
@@ -817,8 +817,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
                                 * This occurs when user tries to read
                                 * from never connected socket.
                                 */
-                               if (!sock_flag(sk, SOCK_DONE))
-                                       ret = -ENOTCONN;
+                               ret = -ENOTCONN;
                                break;
                        }
                        if (!timeo) {
@@ -1241,7 +1240,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
                /* 'common' sending to sendq */
        }
 
-       sockc.tsflags = sk->sk_tsflags;
+       sockcm_init(&sockc, sk);
        if (msg->msg_controllen) {
                err = sock_cmsg_send(sk, msg, &sockc);
                if (unlikely(err)) {
@@ -1275,9 +1274,6 @@ restart:
                        int linear;
 
 new_segment:
-                       /* Allocate new segment. If the interface is SG,
-                        * allocate skb fitting to single page.
-                        */
                        if (!sk_stream_memory_free(sk))
                                goto wait_for_sndbuf;
 
@@ -2042,13 +2038,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
                                break;
 
                        if (sk->sk_state == TCP_CLOSE) {
-                               if (!sock_flag(sk, SOCK_DONE)) {
-                                       /* This occurs when user tries to read
-                                        * from never connected socket.
-                                        */
-                                       copied = -ENOTCONN;
-                                       break;
-                               }
+                               /* This occurs when user tries to read
+                                * from never connected socket.
+                                */
+                               copied = -ENOTCONN;
                                break;
                        }
 
@@ -2538,7 +2531,6 @@ int tcp_disconnect(struct sock *sk, int flags)
        struct inet_sock *inet = inet_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
-       int err = 0;
        int old_state = sk->sk_state;
 
        if (old_state != TCP_CLOSE)
@@ -2576,6 +2568,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        sk->sk_shutdown = 0;
        sock_reset_flag(sk, SOCK_DONE);
        tp->srtt_us = 0;
+       tp->rcv_rtt_last_tsecr = 0;
        tp->write_seq += tp->max_window + 2;
        if (tp->write_seq == 0)
                tp->write_seq = 1;
@@ -2600,6 +2593,10 @@ int tcp_disconnect(struct sock *sk, int flags)
        sk->sk_rx_dst = NULL;
        tcp_saved_syn_free(tp);
        tp->compressed_ack = 0;
+       tp->bytes_sent = 0;
+       tp->bytes_retrans = 0;
+       tp->dsack_dups = 0;
+       tp->reord_seen = 0;
 
        /* Clean up fastopen related fields */
        tcp_free_fastopen_req(tp);
@@ -2614,7 +2611,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        }
 
        sk->sk_error_report(sk);
-       return err;
+       return 0;
 }
 EXPORT_SYMBOL(tcp_disconnect);
 
@@ -2995,7 +2992,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                if (val < 0)
                        err = -EINVAL;
                else
-                       icsk->icsk_user_timeout = msecs_to_jiffies(val);
+                       icsk->icsk_user_timeout = val;
                break;
 
        case TCP_FASTOPEN:
@@ -3207,10 +3204,41 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
                info->tcpi_delivery_rate = rate64;
        info->tcpi_delivered = tp->delivered;
        info->tcpi_delivered_ce = tp->delivered_ce;
+       info->tcpi_bytes_sent = tp->bytes_sent;
+       info->tcpi_bytes_retrans = tp->bytes_retrans;
+       info->tcpi_dsack_dups = tp->dsack_dups;
+       info->tcpi_reord_seen = tp->reord_seen;
        unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
+static size_t tcp_opt_stats_get_size(void)
+{
+       return
+               nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BUSY */
+               nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_RWND_LIMITED */
+               nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_SNDBUF_LIMITED */
+               nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DATA_SEGS_OUT */
+               nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_TOTAL_RETRANS */
+               nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_PACING_RATE */
+               nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DELIVERY_RATE */
+               nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_CWND */
+               nla_total_size(sizeof(u32)) + /* TCP_NLA_REORDERING */
+               nla_total_size(sizeof(u32)) + /* TCP_NLA_MIN_RTT */
+               nla_total_size(sizeof(u8)) + /* TCP_NLA_RECUR_RETRANS */
+               nla_total_size(sizeof(u8)) + /* TCP_NLA_DELIVERY_RATE_APP_LMT */
+               nla_total_size(sizeof(u32)) + /* TCP_NLA_SNDQ_SIZE */
+               nla_total_size(sizeof(u8)) + /* TCP_NLA_CA_STATE */
+               nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_SSTHRESH */
+               nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */
+               nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
+               nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
+               nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
+               nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
+               nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
+               0;
+}
+
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
@@ -3219,9 +3247,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
        u64 rate64;
        u32 rate;
 
-       stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
-                         7 * nla_total_size(sizeof(u32)) +
-                         3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
+       stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
        if (!stats)
                return NULL;
 
@@ -3257,6 +3283,13 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
        nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
        nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
 
+       nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
+                         TCP_NLA_PAD);
+       nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
+                         TCP_NLA_PAD);
+       nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
+       nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
+
        return stats;
 }
 
@@ -3451,7 +3484,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                break;
 
        case TCP_USER_TIMEOUT:
-               val = jiffies_to_msecs(icsk->icsk_user_timeout);
+               val = icsk->icsk_user_timeout;
                break;
 
        case TCP_FASTOPEN:
index 4bfff3c87e8e2de2c9af77ae1d5bb157a28f2207..13d34427ca3dd5bee810395ba4a1ab9759863182 100644 (file)
@@ -205,7 +205,11 @@ static u32 bbr_bw(const struct sock *sk)
  */
 static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
 {
-       rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
+       unsigned int mss = tcp_sk(sk)->mss_cache;
+
+       if (!tcp_needs_internal_pacing(sk))
+               mss = tcp_mss_to_mtu(sk, mss);
+       rate *= mss;
        rate *= gain;
        rate >>= BBR_SCALE;
        rate *= USEC_PER_SEC;
index f9dcb29be12da9c637db0c2382eaf357c052d804..715d541b52ddc4f4876dae2bf003863dfbdfe23e 100644 (file)
@@ -78,6 +78,7 @@
 #include <linux/errqueue.h>
 #include <trace/events/tcp.h>
 #include <linux/static_key.h>
+#include <net/busy_poll.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 
@@ -590,9 +591,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
-       if (tp->rx_opt.rcv_tsecr &&
-           (TCP_SKB_CB(skb)->end_seq -
-            TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+       if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
+               return;
+       tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
+
+       if (TCP_SKB_CB(skb)->end_seq -
+           TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
                u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
                u32 delta_us;
 
@@ -877,6 +881,7 @@ static void tcp_dsack_seen(struct tcp_sock *tp)
 {
        tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
        tp->rack.dsack_seen = 1;
+       tp->dsack_dups++;
 }
 
 /* It's reordering when higher sequence was delivered (i.e. sacked) before
@@ -908,8 +913,8 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
                                       sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
        }
 
-       tp->rack.reord = 1;
        /* This exciting event is worth to be remembered. 8) */
+       tp->reord_seen++;
        NET_INC_STATS(sock_net(sk),
                      ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
 }
@@ -1873,6 +1878,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
 
        tp->reordering = min_t(u32, tp->packets_out + addend,
                               sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+       tp->reord_seen++;
        NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
 }
 
@@ -3466,7 +3472,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 static void tcp_store_ts_recent(struct tcp_sock *tp)
 {
        tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-       tp->rx_opt.ts_recent_stamp = get_seconds();
+       tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 }
 
 static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -4347,6 +4353,11 @@ static bool tcp_try_coalesce(struct sock *sk,
        if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
                return false;
 
+#ifdef CONFIG_TLS_DEVICE
+       if (from->decrypted != to->decrypted)
+               return false;
+#endif
+
        if (!skb_try_coalesce(to, from, fragstolen, &delta))
                return false;
 
@@ -4642,8 +4653,10 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
        skb->data_len = data_len;
        skb->len = size;
 
-       if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+       if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
                goto err_free;
+       }
 
        err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
        if (err)
@@ -4699,18 +4712,21 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
         *  Out of sequence packets to the out_of_order_queue.
         */
        if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
-               if (tcp_receive_window(tp) == 0)
+               if (tcp_receive_window(tp) == 0) {
+                       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
                        goto out_of_window;
+               }
 
                /* Ok. In sequence. In window. */
 queue_and_out:
                if (skb_queue_len(&sk->sk_receive_queue) == 0)
                        sk_forced_mem_schedule(sk, skb->truesize);
-               else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+               else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+                       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
                        goto drop;
+               }
 
                eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
-               tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                if (skb->len)
                        tcp_event_data_recv(sk, skb);
                if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4766,8 +4782,10 @@ drop:
                /* If window is closed, drop tail of packet. But after
                 * remembering D-SACK for its head made in previous line.
                 */
-               if (!tcp_receive_window(tp))
+               if (!tcp_receive_window(tp)) {
+                       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
                        goto out_of_window;
+               }
                goto queue_and_out;
        }
 
@@ -4885,6 +4903,9 @@ restart:
                        break;
 
                memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+#ifdef CONFIG_TLS_DEVICE
+               nskb->decrypted = skb->decrypted;
+#endif
                TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
                if (list)
                        __skb_queue_before(list, skb, nskb);
@@ -4912,6 +4933,10 @@ restart:
                                    skb == tail ||
                                    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
                                        goto end;
+#ifdef CONFIG_TLS_DEVICE
+                               if (skb->decrypted != nskb->decrypted)
+                                       goto end;
+#endif
                        }
                }
        }
@@ -5530,6 +5555,11 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
                                tcp_ack(sk, skb, 0);
                                __kfree_skb(skb);
                                tcp_data_snd_check(sk);
+                               /* When receiving pure ack in fast path, update
+                                * last ts ecr directly instead of calling
+                                * tcp_rcv_rtt_measure_ts()
+                                */
+                               tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
                                return;
                        } else { /* Header too small */
                                TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -5631,6 +5661,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
        if (skb) {
                icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
                security_inet_conn_established(sk, skb);
+               sk_mark_napi_id(sk, skb);
        }
 
        tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
@@ -6459,6 +6490,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
        tcp_rsk(req)->snt_isn = isn;
        tcp_rsk(req)->txhash = net_tx_rndhash();
        tcp_openreq_init_rwin(req, sk, dst);
+       sk_rx_queue_set(req_to_sk(req), skb);
        if (!want_cookie) {
                tcp_reqsk_record_syn(sk, req, skb);
                fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
index 3b2711e33e4c7c06ed8caec20cf0241f36068f54..9e041fa5c545367961f03fa8a9124aebbc1b6c69 100644 (file)
@@ -155,7 +155,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
           and use initial timestamp retrieved from peer table.
         */
        if (tcptw->tw_ts_recent_stamp &&
-           (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
+           (!twp || (reuse && time_after32(ktime_get_seconds(),
+                                           tcptw->tw_ts_recent_stamp)))) {
                /* In case of repair and re-using TIME-WAIT sockets we still
                 * want to be sure that it is safe as above but honor the
                 * sequence numbers and time stamps set as part of the repair
index 1dda1341a223937580b4efdbedb21ae50b221ff7..75ef332a7caf44de619acf030977eba01565c70a 100644 (file)
@@ -144,7 +144,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
                tw->tw_substate   = TCP_TIME_WAIT;
                tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                if (tmp_opt.saw_tstamp) {
-                       tcptw->tw_ts_recent_stamp = get_seconds();
+                       tcptw->tw_ts_recent_stamp = ktime_get_seconds();
                        tcptw->tw_ts_recent       = tmp_opt.rcv_tsval;
                }
 
@@ -189,7 +189,7 @@ kill:
 
                if (tmp_opt.saw_tstamp) {
                        tcptw->tw_ts_recent       = tmp_opt.rcv_tsval;
-                       tcptw->tw_ts_recent_stamp = get_seconds();
+                       tcptw->tw_ts_recent_stamp = ktime_get_seconds();
                }
 
                inet_twsk_put(tw);
@@ -449,119 +449,122 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
                                      struct sk_buff *skb)
 {
        struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
+       const struct inet_request_sock *ireq = inet_rsk(req);
+       struct tcp_request_sock *treq = tcp_rsk(req);
+       struct inet_connection_sock *newicsk;
+       struct tcp_sock *oldtp, *newtp;
 
-       if (newsk) {
-               const struct inet_request_sock *ireq = inet_rsk(req);
-               struct tcp_request_sock *treq = tcp_rsk(req);
-               struct inet_connection_sock *newicsk = inet_csk(newsk);
-               struct tcp_sock *newtp = tcp_sk(newsk);
-               struct tcp_sock *oldtp = tcp_sk(sk);
-
-               smc_check_reset_syn_req(oldtp, req, newtp);
-
-               /* Now setup tcp_sock */
-               newtp->pred_flags = 0;
-
-               newtp->rcv_wup = newtp->copied_seq =
-               newtp->rcv_nxt = treq->rcv_isn + 1;
-               newtp->segs_in = 1;
-
-               newtp->snd_sml = newtp->snd_una =
-               newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
-
-               INIT_LIST_HEAD(&newtp->tsq_node);
-               INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
-
-               tcp_init_wl(newtp, treq->rcv_isn);
-
-               newtp->srtt_us = 0;
-               newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
-               minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
-               newicsk->icsk_rto = TCP_TIMEOUT_INIT;
-               newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
-
-               newtp->packets_out = 0;
-               newtp->retrans_out = 0;
-               newtp->sacked_out = 0;
-               newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
-               newtp->tlp_high_seq = 0;
-               newtp->lsndtime = tcp_jiffies32;
-               newsk->sk_txhash = treq->txhash;
-               newtp->last_oow_ack_time = 0;
-               newtp->total_retrans = req->num_retrans;
-
-               /* So many TCP implementations out there (incorrectly) count the
-                * initial SYN frame in their delayed-ACK and congestion control
-                * algorithms that we must have the following bandaid to talk
-                * efficiently to them.  -DaveM
-                */
-               newtp->snd_cwnd = TCP_INIT_CWND;
-               newtp->snd_cwnd_cnt = 0;
-
-               /* There's a bubble in the pipe until at least the first ACK. */
-               newtp->app_limited = ~0U;
-
-               tcp_init_xmit_timers(newsk);
-               newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
-
-               newtp->rx_opt.saw_tstamp = 0;
-
-               newtp->rx_opt.dsack = 0;
-               newtp->rx_opt.num_sacks = 0;
-
-               newtp->urg_data = 0;
-
-               if (sock_flag(newsk, SOCK_KEEPOPEN))
-                       inet_csk_reset_keepalive_timer(newsk,
-                                                      keepalive_time_when(newtp));
-
-               newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
-               newtp->rx_opt.sack_ok = ireq->sack_ok;
-               newtp->window_clamp = req->rsk_window_clamp;
-               newtp->rcv_ssthresh = req->rsk_rcv_wnd;
-               newtp->rcv_wnd = req->rsk_rcv_wnd;
-               newtp->rx_opt.wscale_ok = ireq->wscale_ok;
-               if (newtp->rx_opt.wscale_ok) {
-                       newtp->rx_opt.snd_wscale = ireq->snd_wscale;
-                       newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
-               } else {
-                       newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
-                       newtp->window_clamp = min(newtp->window_clamp, 65535U);
-               }
-               newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
-                                 newtp->rx_opt.snd_wscale);
-               newtp->max_window = newtp->snd_wnd;
-
-               if (newtp->rx_opt.tstamp_ok) {
-                       newtp->rx_opt.ts_recent = req->ts_recent;
-                       newtp->rx_opt.ts_recent_stamp = get_seconds();
-                       newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
-               } else {
-                       newtp->rx_opt.ts_recent_stamp = 0;
-                       newtp->tcp_header_len = sizeof(struct tcphdr);
-               }
-               newtp->tsoffset = treq->ts_off;
+       if (!newsk)
+               return NULL;
+
+       newicsk = inet_csk(newsk);
+       newtp = tcp_sk(newsk);
+       oldtp = tcp_sk(sk);
+
+       smc_check_reset_syn_req(oldtp, req, newtp);
+
+       /* Now setup tcp_sock */
+       newtp->pred_flags = 0;
+
+       newtp->rcv_wup = newtp->copied_seq =
+       newtp->rcv_nxt = treq->rcv_isn + 1;
+       newtp->segs_in = 1;
+
+       newtp->snd_sml = newtp->snd_una =
+       newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
+
+       INIT_LIST_HEAD(&newtp->tsq_node);
+       INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
+
+       tcp_init_wl(newtp, treq->rcv_isn);
+
+       newtp->srtt_us = 0;
+       newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+       minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
+       newicsk->icsk_rto = TCP_TIMEOUT_INIT;
+       newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
+
+       newtp->packets_out = 0;
+       newtp->retrans_out = 0;
+       newtp->sacked_out = 0;
+       newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+       newtp->tlp_high_seq = 0;
+       newtp->lsndtime = tcp_jiffies32;
+       newsk->sk_txhash = treq->txhash;
+       newtp->last_oow_ack_time = 0;
+       newtp->total_retrans = req->num_retrans;
+
+       /* So many TCP implementations out there (incorrectly) count the
+        * initial SYN frame in their delayed-ACK and congestion control
+        * algorithms that we must have the following bandaid to talk
+        * efficiently to them.  -DaveM
+        */
+       newtp->snd_cwnd = TCP_INIT_CWND;
+       newtp->snd_cwnd_cnt = 0;
+
+       /* There's a bubble in the pipe until at least the first ACK. */
+       newtp->app_limited = ~0U;
+
+       tcp_init_xmit_timers(newsk);
+       newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
+
+       newtp->rx_opt.saw_tstamp = 0;
+
+       newtp->rx_opt.dsack = 0;
+       newtp->rx_opt.num_sacks = 0;
+
+       newtp->urg_data = 0;
+
+       if (sock_flag(newsk, SOCK_KEEPOPEN))
+               inet_csk_reset_keepalive_timer(newsk,
+                                              keepalive_time_when(newtp));
+
+       newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
+       newtp->rx_opt.sack_ok = ireq->sack_ok;
+       newtp->window_clamp = req->rsk_window_clamp;
+       newtp->rcv_ssthresh = req->rsk_rcv_wnd;
+       newtp->rcv_wnd = req->rsk_rcv_wnd;
+       newtp->rx_opt.wscale_ok = ireq->wscale_ok;
+       if (newtp->rx_opt.wscale_ok) {
+               newtp->rx_opt.snd_wscale = ireq->snd_wscale;
+               newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
+       } else {
+               newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
+               newtp->window_clamp = min(newtp->window_clamp, 65535U);
+       }
+       newtp->snd_wnd = ntohs(tcp_hdr(skb)->window) << newtp->rx_opt.snd_wscale;
+       newtp->max_window = newtp->snd_wnd;
+
+       if (newtp->rx_opt.tstamp_ok) {
+               newtp->rx_opt.ts_recent = req->ts_recent;
+               newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
+               newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+       } else {
+               newtp->rx_opt.ts_recent_stamp = 0;
+               newtp->tcp_header_len = sizeof(struct tcphdr);
+       }
+       newtp->tsoffset = treq->ts_off;
 #ifdef CONFIG_TCP_MD5SIG
-               newtp->md5sig_info = NULL;      /*XXX*/
-               if (newtp->af_specific->md5_lookup(sk, newsk))
-                       newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+       newtp->md5sig_info = NULL;      /*XXX*/
+       if (newtp->af_specific->md5_lookup(sk, newsk))
+               newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif
-               if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
-                       newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
-               newtp->rx_opt.mss_clamp = req->mss;
-               tcp_ecn_openreq_child(newtp, req);
-               newtp->fastopen_req = NULL;
-               newtp->fastopen_rsk = NULL;
-               newtp->syn_data_acked = 0;
-               newtp->rack.mstamp = 0;
-               newtp->rack.advanced = 0;
-               newtp->rack.reo_wnd_steps = 1;
-               newtp->rack.last_delivered = 0;
-               newtp->rack.reo_wnd_persist = 0;
-               newtp->rack.dsack_seen = 0;
-
-               __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
-       }
+       if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
+               newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
+       newtp->rx_opt.mss_clamp = req->mss;
+       tcp_ecn_openreq_child(newtp, req);
+       newtp->fastopen_req = NULL;
+       newtp->fastopen_rsk = NULL;
+       newtp->syn_data_acked = 0;
+       newtp->rack.mstamp = 0;
+       newtp->rack.advanced = 0;
+       newtp->rack.reo_wnd_steps = 1;
+       newtp->rack.last_delivered = 0;
+       newtp->rack.reo_wnd_persist = 0;
+       newtp->rack.dsack_seen = 0;
+
+       __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
+
        return newsk;
 }
 EXPORT_SYMBOL(tcp_create_openreq_child);
@@ -600,7 +603,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
                         * it can be estimated (approximately)
                         * from another data.
                         */
-                       tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
+                       tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
                        paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
                }
        }
index 8cc7c348733052a8ef4bc06d09149171d8277006..870b0a3350616a87580882cbc06382f5e415aef5 100644 (file)
@@ -180,9 +180,9 @@ out:
        return segs;
 }
 
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
-       struct sk_buff **pp = NULL;
+       struct sk_buff *pp = NULL;
        struct sk_buff *p;
        struct tcphdr *th;
        struct tcphdr *th2;
@@ -220,7 +220,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
        len = skb_gro_len(skb);
        flags = tcp_flag_word(th);
 
-       for (; (p = *head); head = &p->next) {
+       list_for_each_entry(p, head, list) {
                if (!NAPI_GRO_CB(p)->same_flow)
                        continue;
 
@@ -233,7 +233,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
                goto found;
        }
-
+       p = NULL;
        goto out_check_final;
 
 found:
@@ -262,8 +262,11 @@ found:
 
        flush |= (len - 1) >= mss;
        flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
+#ifdef CONFIG_TLS_DEVICE
+       flush |= p->decrypted ^ skb->decrypted;
+#endif
 
-       if (flush || skb_gro_receive(head, skb)) {
+       if (flush || skb_gro_receive(p, skb)) {
                mss = 1;
                goto out_check_final;
        }
@@ -277,7 +280,7 @@ out_check_final:
                                        TCP_FLAG_FIN));
 
        if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
-               pp = head;
+               pp = p;
 
 out:
        NAPI_GRO_CB(skb)->flush |= (flush != 0);
@@ -302,7 +305,7 @@ int tcp_gro_complete(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_gro_complete);
 
-static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
        /* Don't bother verifying checksum if we're going to flush anyway. */
        if (!NAPI_GRO_CB(skb)->flush &&
index c4172c1fb198d4bcd1fcaace00308b3f86b0a843..597dbd749f05dc72e53962a5821861fc218774d6 100644 (file)
@@ -977,17 +977,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
        return HRTIMER_NORESTART;
 }
 
-/* BBR congestion control needs pacing.
- * Same remark for SO_MAX_PACING_RATE.
- * sch_fq packet scheduler is efficiently handling pacing,
- * but is not always installed/used.
- * Return true if TCP stack should pace packets itself.
- */
-static bool tcp_needs_internal_pacing(const struct sock *sk)
-{
-       return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
-}
-
 static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
 {
        u64 len_ns;
@@ -999,9 +988,6 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
        if (!rate || rate == ~0U)
                return;
 
-       /* Should account for header sizes as sch_fq does,
-        * but lets make things simple.
-        */
        len_ns = (u64)skb->len * NSEC_PER_SEC;
        do_div(len_ns, rate);
        hrtimer_start(&tcp_sk(sk)->pacing_timer,
@@ -1150,6 +1136,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
        if (skb->len != tcp_header_size) {
                tcp_event_data_sent(tp, sk);
                tp->data_segs_out += tcp_skb_pcount(skb);
+               tp->bytes_sent += skb->len - tcp_header_size;
                tcp_internal_pacing(sk, skb);
        }
 
@@ -2711,9 +2698,8 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *next_skb = skb_rb_next(skb);
-       int skb_size, next_skb_size;
+       int next_skb_size;
 
-       skb_size = skb->len;
        next_skb_size = next_skb->len;
 
        BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
@@ -2884,6 +2870,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
        if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
                __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
        tp->total_retrans += segs;
+       tp->bytes_retrans += skb->len;
 
        /* make sure skb->data is aligned on arches that require it
         * and check if ack-trimming & collapsing extended the headroom
index c61240e43923d6dd6a5d6215074e2da2c2bc71f4..4dff40dad4dc5ccc372f5108b0d6ba38497ab81f 100644 (file)
@@ -146,6 +146,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
                                    rs->prior_mstamp); /* ack phase */
        rs->interval_us = max(snd_us, ack_us);
 
+       /* Record both segment send and ack receive intervals */
+       rs->snd_interval_us = snd_us;
+       rs->rcv_interval_us = ack_us;
+
        /* Normally we expect interval_us >= min-rtt.
         * Note that rate may still be over-estimated when a spuriously
         * retransmistted skb was first (s)acked because "interval_us"
index 71593e4400abe344969ed2a6d0f3461e6f8e9bb2..c81aadff769b2c3eee02e6de3a5545c27e8cbc38 100644 (file)
@@ -25,7 +25,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
-       if (!tp->rack.reord) {
+       if (!tp->reord_seen) {
                /* If reordering has not been observed, be aggressive during
                 * the recovery or starting the recovery by DUPACK threshold.
                 */
index 3b3611729928f77934e0298bb248e55c7a7c5def..7fdf222a0bdfe9775970082f6b5dcdcc82b2ae1a 100644 (file)
 #include <linux/gfp.h>
 #include <net/tcp.h>
 
+static u32 tcp_retransmit_stamp(const struct sock *sk)
+{
+       u32 start_ts = tcp_sk(sk)->retrans_stamp;
+
+       if (unlikely(!start_ts)) {
+               struct sk_buff *head = tcp_rtx_queue_head(sk);
+
+               if (!head)
+                       return 0;
+               start_ts = tcp_skb_timestamp(head);
+       }
+       return start_ts;
+}
+
+static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       u32 elapsed, start_ts;
+
+       start_ts = tcp_retransmit_stamp(sk);
+       if (!icsk->icsk_user_timeout || !start_ts)
+               return icsk->icsk_rto;
+       elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
+       if (elapsed >= icsk->icsk_user_timeout)
+               return 1; /* user timeout has passed; fire ASAP */
+       else
+               return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+}
+
 /**
  *  tcp_write_err() - close socket and save error info
  *  @sk:  The socket the error has appeared on.
@@ -166,14 +195,9 @@ static bool retransmits_timed_out(struct sock *sk,
        if (!inet_csk(sk)->icsk_retransmits)
                return false;
 
-       start_ts = tcp_sk(sk)->retrans_stamp;
-       if (unlikely(!start_ts)) {
-               struct sk_buff *head = tcp_rtx_queue_head(sk);
-
-               if (!head)
-                       return false;
-               start_ts = tcp_skb_timestamp(head);
-       }
+       start_ts = tcp_retransmit_stamp(sk);
+       if (!start_ts)
+               return false;
 
        if (likely(timeout == 0)) {
                linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
@@ -183,8 +207,9 @@ static bool retransmits_timed_out(struct sock *sk,
                else
                        timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
                                (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+               timeout = jiffies_to_msecs(timeout);
        }
-       return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= jiffies_to_msecs(timeout);
+       return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
 }
 
 /* A write timeout has occurred. Process the after effects. */
@@ -337,8 +362,7 @@ static void tcp_probe_timer(struct sock *sk)
        if (!start_ts)
                skb->skb_mstamp = tp->tcp_mstamp;
        else if (icsk->icsk_user_timeout &&
-                (s32)(tcp_time_stamp(tp) - start_ts) >
-                jiffies_to_msecs(icsk->icsk_user_timeout))
+                (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
                goto abort;
 
        max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
@@ -535,7 +559,8 @@ out_reset_timer:
                /* Use normal (exponential) backoff */
                icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
        }
-       inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
+       inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                 tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
        if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
                __sk_dst_reset(sk);
 
@@ -672,7 +697,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
                 * to determine when to timeout instead.
                 */
                if ((icsk->icsk_user_timeout != 0 &&
-                   elapsed >= icsk->icsk_user_timeout &&
+                   elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
                    icsk->icsk_probes_out > 0) ||
                    (icsk->icsk_user_timeout == 0 &&
                    icsk->icsk_probes_out >= keepalive_probes(tp))) {
index 24e116ddae79ce0696e3f63290385ae15e28ac18..060e841dde400fd1e1c02992fa448dc60305cec3 100644 (file)
@@ -926,11 +926,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
                return -EOPNOTSUPP;
 
-       ipc.opt = NULL;
-       ipc.tx_flags = 0;
-       ipc.ttl = 0;
-       ipc.tos = -1;
-
        getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
 
        fl4 = &inet->cork.fl.u.ip4;
@@ -977,9 +972,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                connected = 1;
        }
 
-       ipc.sockc.tsflags = sk->sk_tsflags;
-       ipc.addr = inet->inet_saddr;
-       ipc.oif = sk->sk_bound_dev_if;
+       ipcm_init_sk(&ipc, inet);
        ipc.gso_size = up->gso_size;
 
        if (msg->msg_controllen) {
@@ -1027,8 +1020,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        saddr = ipc.addr;
        ipc.addr = faddr = daddr;
 
-       sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
        if (ipc.opt && ipc.opt->opt.srr) {
                if (!daddr) {
                        err = -EINVAL;
index 69c54540d5b4f2664b78b56468b09e3c1f6ac888..0c0522b79b43f09785ce8fd5f0dc9461a93f0e98 100644 (file)
@@ -343,10 +343,11 @@ out:
        return segs;
 }
 
-struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
-                                struct udphdr *uh, udp_lookup_t lookup)
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+                               struct udphdr *uh, udp_lookup_t lookup)
 {
-       struct sk_buff *p, **pp = NULL;
+       struct sk_buff *pp = NULL;
+       struct sk_buff *p;
        struct udphdr *uh2;
        unsigned int off = skb_gro_offset(skb);
        int flush = 1;
@@ -371,7 +372,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
 unflush:
        flush = 0;
 
-       for (p = *head; p; p = p->next) {
+       list_for_each_entry(p, head, list) {
                if (!NAPI_GRO_CB(p)->same_flow)
                        continue;
 
@@ -399,8 +400,8 @@ out:
 }
 EXPORT_SYMBOL(udp_gro_receive);
 
-static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
-                                        struct sk_buff *skb)
+static struct sk_buff *udp4_gro_receive(struct list_head *head,
+                                       struct sk_buff *skb)
 {
        struct udphdr *uh = udp_gro_udphdr(skb);
 
index b3885ca22d6fb7aa6165c2773ae02d9885099d8f..613282c65a10750cccc5985be0ffc65a68138ae1 100644 (file)
@@ -15,7 +15,7 @@ menuconfig IPV6
          Documentation/networking/ipv6.txt and read the HOWTO at
          <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/>
 
-         To compile this protocol support as a module, choose M here: the 
+         To compile this protocol support as a module, choose M here: the
          module will be called ipv6.
 
 if IPV6
index f66a1cae3366fe7b176c176027c2c7b9b39ec278..2fac4ad748672cd62de6653d3fdedebe743c6ad0 100644 (file)
@@ -385,8 +385,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 
        if (ndev->cnf.stable_secret.initialized)
                ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
-       else
-               ndev->cnf.addr_gen_mode = ipv6_devconf_dflt.addr_gen_mode;
 
        ndev->cnf.mtu6 = dev->mtu;
        ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -5211,7 +5209,9 @@ static inline size_t inet6_ifla6_size(void)
             + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
             + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
             + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
-            + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
+            + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
+            + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
+            + 0;
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -5893,32 +5893,31 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
                                         loff_t *ppos)
 {
        int ret = 0;
-       int new_val;
+       u32 new_val;
        struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
        struct net *net = (struct net *)ctl->extra2;
+       struct ctl_table tmp = {
+               .data = &new_val,
+               .maxlen = sizeof(new_val),
+               .mode = ctl->mode,
+       };
 
        if (!rtnl_trylock())
                return restart_syscall();
 
-       ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+       new_val = *((u32 *)ctl->data);
 
-       if (write) {
-               new_val = *((int *)ctl->data);
+       ret = proc_douintvec(&tmp, write, buffer, lenp, ppos);
+       if (ret != 0)
+               goto out;
 
+       if (write) {
                if (check_addr_gen_mode(new_val) < 0) {
                        ret = -EINVAL;
                        goto out;
                }
 
-               /* request for default */
-               if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) {
-                       ipv6_devconf_dflt.addr_gen_mode = new_val;
-
-               /* request for individual net device */
-               } else {
-                       if (!idev)
-                               goto out;
-
+               if (idev) {
                        if (check_stable_privacy(idev, net, new_val) < 0) {
                                ret = -EINVAL;
                                goto out;
@@ -5928,7 +5927,21 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
                                idev->cnf.addr_gen_mode = new_val;
                                addrconf_dev_config(idev->dev);
                        }
+               } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
+                       struct net_device *dev;
+
+                       net->ipv6.devconf_dflt->addr_gen_mode = new_val;
+                       for_each_netdev(net, dev) {
+                               idev = __in6_dev_get(dev);
+                               if (idev &&
+                                   idev->cnf.addr_gen_mode != new_val) {
+                                       idev->cnf.addr_gen_mode = new_val;
+                                       addrconf_dev_config(idev->dev);
+                               }
+                       }
                }
+
+               *((u32 *)ctl->data) = new_val;
        }
 
 out:
index 9ed0eae91758f8506b4f6ca0fe3a9c2dc3fe1323..020f6e14a7afe130e6b016fa8b55d230e6a04559 100644 (file)
@@ -322,8 +322,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
                /* Reproduce AF_INET checks to make the bindings consistent */
                v4addr = addr->sin6_addr.s6_addr32[3];
                chk_addr_ret = inet_addr_type(net, v4addr);
-               if (!net->ipv4.sysctl_ip_nonlocal_bind &&
-                   !(inet->freebind || inet->transparent) &&
+               if (!inet_can_nonlocal_bind(net, inet) &&
                    v4addr != htonl(INADDR_ANY) &&
                    chk_addr_ret != RTN_LOCAL &&
                    chk_addr_ret != RTN_MULTICAST &&
@@ -362,8 +361,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
                         */
                        v4addr = LOOPBACK4_IPV6;
                        if (!(addr_type & IPV6_ADDR_MULTICAST)) {
-                               if (!net->ipv6.sysctl.ip_nonlocal_bind &&
-                                   !(inet->freebind || inet->transparent) &&
+                               if (!ipv6_can_nonlocal_bind(net, inet) &&
                                    !ipv6_chk_addr(net, &addr->sin6_addr,
                                                   dev, 0)) {
                                        err = -EADDRNOTAVAIL;
@@ -764,6 +762,7 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
 static struct packet_type ipv6_packet_type __read_mostly = {
        .type = cpu_to_be16(ETH_P_IPV6),
        .func = ipv6_rcv,
+       .list_func = ipv6_list_rcv,
 };
 
 static int __init ipv6_packet_init(void)
index 1a1f876f8e282d636a13ae1f48c3f90a9f754bbc..1ede7a16a0bec897a8e09b79915f16dbcd46cd2d 100644 (file)
@@ -739,7 +739,7 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
 
 int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
                          struct msghdr *msg, struct flowi6 *fl6,
-                         struct ipcm6_cookie *ipc6, struct sockcm_cookie *sockc)
+                         struct ipcm6_cookie *ipc6)
 {
        struct in6_pktinfo *src_info;
        struct cmsghdr *cmsg;
@@ -758,7 +758,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
                }
 
                if (cmsg->cmsg_level == SOL_SOCKET) {
-                       err = __sock_cmsg_send(sk, msg, cmsg, sockc);
+                       err = __sock_cmsg_send(sk, msg, cmsg, &ipc6->sockc);
                        if (err)
                                return err;
                        continue;
@@ -803,7 +803,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 
                        if (addr_type != IPV6_ADDR_ANY) {
                                int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
-                               if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
+                               if (!ipv6_can_nonlocal_bind(net, inet_sk(sk)) &&
                                    !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
                                                             dev, !strict, 0,
                                                             IFA_F_TENTATIVE) &&
index 27f59b61f70f59f6b4a4502727a161b5f1b91ef1..6177e217117189d1b9d422958618443c3e4a0f7f 100644 (file)
@@ -49,8 +49,8 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
        return 0;
 }
 
-static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
-                                        struct sk_buff *skb)
+static struct sk_buff *esp6_gro_receive(struct list_head *head,
+                                       struct sk_buff *skb)
 {
        int offset = skb_gro_offset(skb);
        struct xfrm_offload *xo;
@@ -162,8 +162,7 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
 
        skb->encap_hdr_csum = 1;
 
-       if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
-           (x->xso.dev != skb->dev))
+       if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev)
                esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
        else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
                esp_features = features & ~NETIF_F_CSUM_MASK;
@@ -207,8 +206,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features
        if (!xo)
                return -EINVAL;
 
-       if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
-           (x->xso.dev != skb->dev)) {
+       if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) {
                xo->flags |= CRYPTO_FALLBACK;
                hw_offload = false;
        }
index ef2505aefc159d9a5a3fc544179bc5d086377dd2..00d159d431dc86367d8cf1d9ff8a0874704bf579 100644 (file)
@@ -431,7 +431,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
        struct icmp6hdr tmp_hdr;
        struct flowi6 fl6;
        struct icmpv6_msg msg;
-       struct sockcm_cookie sockc_unused = {0};
        struct ipcm6_cookie ipc6;
        int iif = 0;
        int addr_type = 0;
@@ -546,7 +545,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
        else if (!fl6.flowi6_oif)
                fl6.flowi6_oif = np->ucast_oif;
 
-       ipc6.tclass = np->tclass;
+       ipcm6_init_sk(&ipc6, np);
        fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
        dst = icmpv6_route_lookup(net, skb, sk, &fl6);
@@ -554,8 +553,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
                goto out;
 
        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-       ipc6.dontfrag = np->dontfrag;
-       ipc6.opt = NULL;
 
        msg.skb = skb;
        msg.offset = skb_network_offset(skb);
@@ -576,7 +573,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
                            len + sizeof(struct icmp6hdr),
                            sizeof(struct icmp6hdr),
                            &ipc6, &fl6, (struct rt6_info *)dst,
-                           MSG_DONTWAIT, &sockc_unused)) {
+                           MSG_DONTWAIT)) {
                ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
                ip6_flush_pending_frames(sk);
        } else {
@@ -680,7 +677,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
        struct dst_entry *dst;
        struct ipcm6_cookie ipc6;
        u32 mark = IP6_REPLY_MARK(net, skb->mark);
-       struct sockcm_cookie sockc_unused = {0};
 
        saddr = &ipv6_hdr(skb)->daddr;
 
@@ -727,16 +723,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
        msg.offset = 0;
        msg.type = ICMPV6_ECHO_REPLY;
 
+       ipcm6_init_sk(&ipc6, np);
        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
        ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
-       ipc6.dontfrag = np->dontfrag;
-       ipc6.opt = NULL;
 
        if (ip6_append_data(sk, icmpv6_getfrag, &msg,
                            skb->len + sizeof(struct icmp6hdr),
                            sizeof(struct icmp6hdr), &ipc6, &fl6,
-                           (struct rt6_info *)dst, MSG_DONTWAIT,
-                           &sockc_unused)) {
+                           (struct rt6_info *)dst, MSG_DONTWAIT)) {
                __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
                ip6_flush_pending_frames(sk);
        } else {
index 4b32e5921e5ced56de7101c816c3d0c40b3df2cb..b7739aba6e684bc68bc76f38087e449ee10383ba 100644 (file)
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_IPV6_ILA) += ila.o
 
-ila-objs := ila_common.o ila_lwt.o ila_xlat.o
+ila-objs := ila_main.o ila_common.o ila_lwt.o ila_xlat.o
index 3c7a11b62334da87b9e7e7f1b06cab47e0b5f044..1f747bcbec295303c8e08307a87c6c55221194d1 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/skbuff.h>
 #include <linux/types.h>
 #include <net/checksum.h>
+#include <net/genetlink.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <uapi/linux/ila.h>
@@ -104,9 +105,31 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
 
 void ila_init_saved_csum(struct ila_params *p);
 
+struct ila_net {
+       struct {
+               struct rhashtable rhash_table;
+               spinlock_t *locks; /* Bucket locks for entry manipulation */
+               unsigned int locks_mask;
+               bool hooks_registered;
+       } xlat;
+};
+
 int ila_lwt_init(void);
 void ila_lwt_fini(void);
-int ila_xlat_init(void);
-void ila_xlat_fini(void);
+
+int ila_xlat_init_net(struct net *net);
+void ila_xlat_exit_net(struct net *net);
+
+int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info);
+int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info);
+int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info);
+int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info);
+int ila_xlat_nl_dump_start(struct netlink_callback *cb);
+int ila_xlat_nl_dump_done(struct netlink_callback *cb);
+int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb);
+
+extern unsigned int ila_net_id;
+
+extern struct genl_family ila_nl_family;
 
 #endif /* __ILA_H */
index 8c88ecf29b93529acd8014dabdd618362a140922..95e9146918cc6f8ddacf7f1c79540f1d157c5ce5 100644 (file)
@@ -153,34 +153,3 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
        /* Now change destination address */
        iaddr->loc = p->locator;
 }
-
-static int __init ila_init(void)
-{
-       int ret;
-
-       ret = ila_lwt_init();
-
-       if (ret)
-               goto fail_lwt;
-
-       ret = ila_xlat_init();
-       if (ret)
-               goto fail_xlat;
-
-       return 0;
-fail_xlat:
-       ila_lwt_fini();
-fail_lwt:
-       return ret;
-}
-
-static void __exit ila_fini(void)
-{
-       ila_xlat_fini();
-       ila_lwt_fini();
-}
-
-module_init(ila_init);
-module_exit(ila_fini);
-MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
new file mode 100644 (file)
index 0000000..18fac76
--- /dev/null
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <net/genetlink.h>
+#include <net/ila.h>
+#include <net/netns/generic.h>
+#include <uapi/linux/genetlink.h>
+#include "ila.h"
+
+static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+       [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
+       [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
+       [ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
+       [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+       [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
+};
+
+static const struct genl_ops ila_nl_ops[] = {
+       {
+               .cmd = ILA_CMD_ADD,
+               .doit = ila_xlat_nl_cmd_add_mapping,
+               .policy = ila_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd = ILA_CMD_DEL,
+               .doit = ila_xlat_nl_cmd_del_mapping,
+               .policy = ila_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd = ILA_CMD_FLUSH,
+               .doit = ila_xlat_nl_cmd_flush,
+               .policy = ila_nl_policy,
+               .flags = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd = ILA_CMD_GET,
+               .doit = ila_xlat_nl_cmd_get_mapping,
+               .start = ila_xlat_nl_dump_start,
+               .dumpit = ila_xlat_nl_dump,
+               .done = ila_xlat_nl_dump_done,
+               .policy = ila_nl_policy,
+       },
+};
+
+unsigned int ila_net_id;
+
+struct genl_family ila_nl_family __ro_after_init = {
+       .hdrsize        = 0,
+       .name           = ILA_GENL_NAME,
+       .version        = ILA_GENL_VERSION,
+       .maxattr        = ILA_ATTR_MAX,
+       .netnsok        = true,
+       .parallel_ops   = true,
+       .module         = THIS_MODULE,
+       .ops            = ila_nl_ops,
+       .n_ops          = ARRAY_SIZE(ila_nl_ops),
+};
+
+static __net_init int ila_init_net(struct net *net)
+{
+       int err;
+
+       err = ila_xlat_init_net(net);
+       if (err)
+               goto ila_xlat_init_fail;
+
+       return 0;
+
+ila_xlat_init_fail:
+       return err;
+}
+
+static __net_exit void ila_exit_net(struct net *net)
+{
+       ila_xlat_exit_net(net);
+}
+
+static struct pernet_operations ila_net_ops = {
+       .init = ila_init_net,
+       .exit = ila_exit_net,
+       .id   = &ila_net_id,
+       .size = sizeof(struct ila_net),
+};
+
+static int __init ila_init(void)
+{
+       int ret;
+
+       ret = register_pernet_device(&ila_net_ops);
+       if (ret)
+               goto register_device_fail;
+
+       ret = genl_register_family(&ila_nl_family);
+       if (ret)
+               goto register_family_fail;
+
+       ret = ila_lwt_init();
+       if (ret)
+               goto fail_lwt;
+
+       return 0;
+
+fail_lwt:
+       genl_unregister_family(&ila_nl_family);
+register_family_fail:
+       unregister_pernet_device(&ila_net_ops);
+register_device_fail:
+       return ret;
+}
+
+static void __exit ila_fini(void)
+{
+       ila_lwt_fini();
+       genl_unregister_family(&ila_nl_family);
+       unregister_pernet_device(&ila_net_ops);
+}
+
+module_init(ila_init);
+module_exit(ila_fini);
+MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
+MODULE_LICENSE("GPL");
index 10ae13560b407e28643fe2ed772de868ef41804f..17c455ff69ffe4697fdc36be605e454b87d8e7df 100644 (file)
@@ -22,36 +22,14 @@ struct ila_map {
        struct rcu_head rcu;
 };
 
-static unsigned int ila_net_id;
-
-struct ila_net {
-       struct rhashtable rhash_table;
-       spinlock_t *locks; /* Bucket locks for entry manipulation */
-       unsigned int locks_mask;
-       bool hooks_registered;
-};
-
+#define MAX_LOCKS 1024
 #define        LOCKS_PER_CPU 10
 
 static int alloc_ila_locks(struct ila_net *ilan)
 {
-       unsigned int i, size;
-       unsigned int nr_pcpus = num_possible_cpus();
-
-       nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
-       size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU);
-
-       if (sizeof(spinlock_t) != 0) {
-               ilan->locks = kvmalloc_array(size, sizeof(spinlock_t),
-                                            GFP_KERNEL);
-               if (!ilan->locks)
-                       return -ENOMEM;
-               for (i = 0; i < size; i++)
-                       spin_lock_init(&ilan->locks[i]);
-       }
-       ilan->locks_mask = size - 1;
-
-       return 0;
+       return alloc_bucket_spinlocks(&ilan->xlat.locks, &ilan->xlat.locks_mask,
+                                     MAX_LOCKS, LOCKS_PER_CPU,
+                                     GFP_KERNEL);
 }
 
 static u32 hashrnd __read_mostly;
@@ -71,7 +49,7 @@ static inline u32 ila_locator_hash(struct ila_locator loc)
 static inline spinlock_t *ila_get_lock(struct ila_net *ilan,
                                       struct ila_locator loc)
 {
-       return &ilan->locks[ila_locator_hash(loc) & ilan->locks_mask];
+       return &ilan->xlat.locks[ila_locator_hash(loc) & ilan->xlat.locks_mask];
 }
 
 static inline int ila_cmp_wildcards(struct ila_map *ila,
@@ -115,16 +93,6 @@ static const struct rhashtable_params rht_params = {
        .obj_cmpfn = ila_cmpfn,
 };
 
-static struct genl_family ila_nl_family;
-
-static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
-       [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
-       [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
-       [ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
-       [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
-       [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
-};
-
 static int parse_nl_config(struct genl_info *info,
                           struct ila_xlat_params *xp)
 {
@@ -162,7 +130,7 @@ static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr,
 {
        struct ila_map *ila;
 
-       ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->loc,
+       ila = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &iaddr->loc,
                                     rht_params);
        while (ila) {
                if (!ila_cmp_wildcards(ila, iaddr, ifindex))
@@ -179,7 +147,7 @@ static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp,
 {
        struct ila_map *ila;
 
-       ila = rhashtable_lookup_fast(&ilan->rhash_table,
+       ila = rhashtable_lookup_fast(&ilan->xlat.rhash_table,
                                     &xp->ip.locator_match,
                                     rht_params);
        while (ila) {
@@ -196,9 +164,9 @@ static inline void ila_release(struct ila_map *ila)
        kfree_rcu(ila, rcu);
 }
 
-static void ila_free_cb(void *ptr, void *arg)
+static void ila_free_node(struct ila_map *ila)
 {
-       struct ila_map *ila = (struct ila_map *)ptr, *next;
+       struct ila_map *next;
 
        /* Assume rcu_readlock held */
        while (ila) {
@@ -208,6 +176,11 @@ static void ila_free_cb(void *ptr, void *arg)
        }
 }
 
+static void ila_free_cb(void *ptr, void *arg)
+{
+       ila_free_node((struct ila_map *)ptr);
+}
+
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila);
 
 static unsigned int
@@ -235,7 +208,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
        spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
        int err = 0, order;
 
-       if (!ilan->hooks_registered) {
+       if (!ilan->xlat.hooks_registered) {
                /* We defer registering net hooks in the namespace until the
                 * first mapping is added.
                 */
@@ -244,7 +217,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
                if (err)
                        return err;
 
-               ilan->hooks_registered = true;
+               ilan->xlat.hooks_registered = true;
        }
 
        ila = kzalloc(sizeof(*ila), GFP_KERNEL);
@@ -259,12 +232,12 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
 
        spin_lock(lock);
 
-       head = rhashtable_lookup_fast(&ilan->rhash_table,
+       head = rhashtable_lookup_fast(&ilan->xlat.rhash_table,
                                      &xp->ip.locator_match,
                                      rht_params);
        if (!head) {
                /* New entry for the rhash_table */
-               err = rhashtable_lookup_insert_fast(&ilan->rhash_table,
+               err = rhashtable_lookup_insert_fast(&ilan->xlat.rhash_table,
                                                    &ila->node, rht_params);
        } else {
                struct ila_map *tila = head, *prev = NULL;
@@ -290,7 +263,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
                } else {
                        /* Make this ila new head */
                        RCU_INIT_POINTER(ila->next, head);
-                       err = rhashtable_replace_fast(&ilan->rhash_table,
+                       err = rhashtable_replace_fast(&ilan->xlat.rhash_table,
                                                      &head->node,
                                                      &ila->node, rht_params);
                        if (err)
@@ -316,7 +289,7 @@ static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp)
 
        spin_lock(lock);
 
-       head = rhashtable_lookup_fast(&ilan->rhash_table,
+       head = rhashtable_lookup_fast(&ilan->xlat.rhash_table,
                                      &xp->ip.locator_match, rht_params);
        ila = head;
 
@@ -346,15 +319,15 @@ static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp)
                                 * table
                                 */
                                err = rhashtable_replace_fast(
-                                       &ilan->rhash_table, &ila->node,
+                                       &ilan->xlat.rhash_table, &ila->node,
                                        &head->node, rht_params);
                                if (err)
                                        goto out;
                        } else {
                                /* Entry no longer used */
-                               err = rhashtable_remove_fast(&ilan->rhash_table,
-                                                            &ila->node,
-                                                            rht_params);
+                               err = rhashtable_remove_fast(
+                                               &ilan->xlat.rhash_table,
+                                               &ila->node, rht_params);
                        }
                }
 
@@ -369,7 +342,7 @@ out:
        return err;
 }
 
-static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
+int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
 {
        struct net *net = genl_info_net(info);
        struct ila_xlat_params p;
@@ -382,7 +355,7 @@ static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
        return ila_add_mapping(net, &p);
 }
 
-static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
+int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
 {
        struct net *net = genl_info_net(info);
        struct ila_xlat_params xp;
@@ -397,6 +370,59 @@ static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
        return 0;
 }
 
+static inline spinlock_t *lock_from_ila_map(struct ila_net *ilan,
+                                           struct ila_map *ila)
+{
+       return ila_get_lock(ilan, ila->xp.ip.locator_match);
+}
+
+int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info)
+{
+       struct net *net = genl_info_net(info);
+       struct ila_net *ilan = net_generic(net, ila_net_id);
+       struct rhashtable_iter iter;
+       struct ila_map *ila;
+       spinlock_t *lock;
+       int ret;
+
+       ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter, GFP_KERNEL);
+       if (ret)
+               goto done;
+
+       rhashtable_walk_start(&iter);
+
+       for (;;) {
+               ila = rhashtable_walk_next(&iter);
+
+               if (IS_ERR(ila)) {
+                       if (PTR_ERR(ila) == -EAGAIN)
+                               continue;
+                       ret = PTR_ERR(ila);
+                       goto done;
+               } else if (!ila) {
+                       break;
+               }
+
+               lock = lock_from_ila_map(ilan, ila);
+
+               spin_lock(lock);
+
+               ret = rhashtable_remove_fast(&ilan->xlat.rhash_table,
+                                            &ila->node, rht_params);
+               if (!ret)
+                       ila_free_node(ila);
+
+               spin_unlock(lock);
+
+               if (ret)
+                       break;
+       }
+
+done:
+       rhashtable_walk_stop(&iter);
+       return ret;
+}
+
 static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
 {
        if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR,
@@ -434,7 +460,7 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
-static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
+int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
 {
        struct net *net = genl_info_net(info);
        struct ila_net *ilan = net_generic(net, ila_net_id);
@@ -475,27 +501,34 @@ out_free:
 
 struct ila_dump_iter {
        struct rhashtable_iter rhiter;
+       int skip;
 };
 
-static int ila_nl_dump_start(struct netlink_callback *cb)
+int ila_xlat_nl_dump_start(struct netlink_callback *cb)
 {
        struct net *net = sock_net(cb->skb->sk);
        struct ila_net *ilan = net_generic(net, ila_net_id);
-       struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
+       struct ila_dump_iter *iter;
+       int ret;
 
-       if (!iter) {
-               iter = kmalloc(sizeof(*iter), GFP_KERNEL);
-               if (!iter)
-                       return -ENOMEM;
+       iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+       if (!iter)
+               return -ENOMEM;
 
-               cb->args[0] = (long)iter;
+       ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter->rhiter,
+                                  GFP_KERNEL);
+       if (ret) {
+               kfree(iter);
+               return ret;
        }
 
-       return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter,
-                                   GFP_KERNEL);
+       iter->skip = 0;
+       cb->args[0] = (long)iter;
+
+       return ret;
 }
 
-static int ila_nl_dump_done(struct netlink_callback *cb)
+int ila_xlat_nl_dump_done(struct netlink_callback *cb)
 {
        struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
 
@@ -506,24 +539,49 @@ static int ila_nl_dump_done(struct netlink_callback *cb)
        return 0;
 }
 
-static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
        struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
        struct rhashtable_iter *rhiter = &iter->rhiter;
+       int skip = iter->skip;
        struct ila_map *ila;
        int ret;
 
        rhashtable_walk_start(rhiter);
 
-       for (;;) {
-               ila = rhashtable_walk_next(rhiter);
+       /* Get first entry */
+       ila = rhashtable_walk_peek(rhiter);
+
+       if (ila && !IS_ERR(ila) && skip) {
+               /* Skip over visited entries */
+
+               while (ila && skip) {
+                       /* Skip over any ila entries in this list that we
+                        * have already dumped.
+                        */
+                       ila = rcu_access_pointer(ila->next);
+                       skip--;
+               }
+       }
 
+       skip = 0;
+
+       for (;;) {
                if (IS_ERR(ila)) {
-                       if (PTR_ERR(ila) == -EAGAIN)
-                               continue;
                        ret = PTR_ERR(ila);
-                       goto done;
+                       if (ret == -EAGAIN) {
+                               /* Table has changed and iter has reset. Return
+                                * -EAGAIN to the application even if we have
+                                * written data to the skb. The application
+                                * needs to deal with this.
+                                */
+
+                               goto out_ret;
+                       } else {
+                               break;
+                       }
                } else if (!ila) {
+                       ret = 0;
                        break;
                }
 
@@ -532,90 +590,54 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
                                             cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                             skb, ILA_CMD_GET);
                        if (ret)
-                               goto done;
+                               goto out;
 
+                       skip++;
                        ila = rcu_access_pointer(ila->next);
                }
+
+               skip = 0;
+               ila = rhashtable_walk_next(rhiter);
        }
 
-       ret = skb->len;
+out:
+       iter->skip = skip;
+       ret = (skb->len ? : ret);
 
-done:
+out_ret:
        rhashtable_walk_stop(rhiter);
        return ret;
 }
 
-static const struct genl_ops ila_nl_ops[] = {
-       {
-               .cmd = ILA_CMD_ADD,
-               .doit = ila_nl_cmd_add_mapping,
-               .policy = ila_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-       },
-       {
-               .cmd = ILA_CMD_DEL,
-               .doit = ila_nl_cmd_del_mapping,
-               .policy = ila_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-       },
-       {
-               .cmd = ILA_CMD_GET,
-               .doit = ila_nl_cmd_get_mapping,
-               .start = ila_nl_dump_start,
-               .dumpit = ila_nl_dump,
-               .done = ila_nl_dump_done,
-               .policy = ila_nl_policy,
-       },
-};
-
-static struct genl_family ila_nl_family __ro_after_init = {
-       .hdrsize        = 0,
-       .name           = ILA_GENL_NAME,
-       .version        = ILA_GENL_VERSION,
-       .maxattr        = ILA_ATTR_MAX,
-       .netnsok        = true,
-       .parallel_ops   = true,
-       .module         = THIS_MODULE,
-       .ops            = ila_nl_ops,
-       .n_ops          = ARRAY_SIZE(ila_nl_ops),
-};
-
 #define ILA_HASH_TABLE_SIZE 1024
 
-static __net_init int ila_init_net(struct net *net)
+int ila_xlat_init_net(struct net *net)
 {
-       int err;
        struct ila_net *ilan = net_generic(net, ila_net_id);
+       int err;
 
        err = alloc_ila_locks(ilan);
        if (err)
                return err;
 
-       rhashtable_init(&ilan->rhash_table, &rht_params);
+       rhashtable_init(&ilan->xlat.rhash_table, &rht_params);
 
        return 0;
 }
 
-static __net_exit void ila_exit_net(struct net *net)
+void ila_xlat_exit_net(struct net *net)
 {
        struct ila_net *ilan = net_generic(net, ila_net_id);
 
-       rhashtable_free_and_destroy(&ilan->rhash_table, ila_free_cb, NULL);
+       rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL);
 
-       kvfree(ilan->locks);
+       free_bucket_spinlocks(ilan->xlat.locks);
 
-       if (ilan->hooks_registered)
+       if (ilan->xlat.hooks_registered)
                nf_unregister_net_hooks(net, ila_nf_hook_ops,
                                        ARRAY_SIZE(ila_nf_hook_ops));
 }
 
-static struct pernet_operations ila_net_ops = {
-       .init = ila_init_net,
-       .exit = ila_exit_net,
-       .id   = &ila_net_id,
-       .size = sizeof(struct ila_net),
-};
-
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
 {
        struct ila_map *ila;
@@ -641,29 +663,3 @@ static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
 
        return 0;
 }
-
-int __init ila_xlat_init(void)
-{
-       int ret;
-
-       ret = register_pernet_device(&ila_net_ops);
-       if (ret)
-               goto exit;
-
-       ret = genl_register_family(&ila_nl_family);
-       if (ret < 0)
-               goto unregister;
-
-       return 0;
-
-unregister:
-       unregister_pernet_device(&ila_net_ops);
-exit:
-       return ret;
-}
-
-void ila_xlat_fini(void)
-{
-       genl_unregister_family(&ila_nl_family);
-       unregister_pernet_device(&ila_net_ops);
-}
index 3eee7637bdfe6cb24addab46b63073c04bdf88bb..cb54a8a3c2735221ec0ee1feaa63c28d3383b5cf 100644 (file)
@@ -373,7 +373,6 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
        if (olen > 0) {
                struct msghdr msg;
                struct flowi6 flowi6;
-               struct sockcm_cookie sockc_junk;
                struct ipcm6_cookie ipc6;
 
                err = -ENOMEM;
@@ -392,7 +391,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
                memset(&flowi6, 0, sizeof(flowi6));
 
                ipc6.opt = fl->opt;
-               err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6, &sockc_junk);
+               err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6);
                if (err)
                        goto done;
                err = -EINVAL;
index cd2cfb04e5d82010a5eb1800a53fc8007479c6f9..fc7dd3a043607e9ba069a64a712b1ab853ef6018 100644 (file)
@@ -989,6 +989,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
                fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 
                dsfield = key->tos;
+               if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+                       goto tx_err;
                md = ip_tunnel_info_opts(tun_info);
                if (!md)
                        goto tx_err;
index f08d34491eceeea4192bdb0482a61394da057476..6242682be876fd1193c8c3dac2b156265830076a 100644 (file)
 #include <net/inet_ecn.h>
 #include <net/dst_metadata.h>
 
-int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
+                               struct sk_buff *skb)
 {
        void (*edemux)(struct sk_buff *skb);
 
-       /* if ingress device is enslaved to an L3 master device pass the
-        * skb to its handler for processing
-        */
-       skb = l3mdev_ip6_rcv(skb);
-       if (!skb)
-               return NET_RX_SUCCESS;
-
        if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
                const struct inet6_protocol *ipprot;
 
@@ -67,20 +61,73 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
        }
        if (!skb_valid_dst(skb))
                ip6_route_input(skb);
+}
+
+int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+       /* if ingress device is enslaved to an L3 master device pass the
+        * skb to its handler for processing
+        */
+       skb = l3mdev_ip6_rcv(skb);
+       if (!skb)
+               return NET_RX_SUCCESS;
+       ip6_rcv_finish_core(net, sk, skb);
 
        return dst_input(skb);
 }
 
-int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static void ip6_sublist_rcv_finish(struct list_head *head)
+{
+       struct sk_buff *skb, *next;
+
+       list_for_each_entry_safe(skb, next, head, list)
+               dst_input(skb);
+}
+
+static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
+                               struct list_head *head)
+{
+       struct dst_entry *curr_dst = NULL;
+       struct sk_buff *skb, *next;
+       struct list_head sublist;
+
+       INIT_LIST_HEAD(&sublist);
+       list_for_each_entry_safe(skb, next, head, list) {
+               struct dst_entry *dst;
+
+               list_del(&skb->list);
+               /* if ingress device is enslaved to an L3 master device pass the
+                * skb to its handler for processing
+                */
+               skb = l3mdev_ip6_rcv(skb);
+               if (!skb)
+                       continue;
+               ip6_rcv_finish_core(net, sk, skb);
+               dst = skb_dst(skb);
+               if (curr_dst != dst) {
+                       /* dispatch old sublist */
+                       if (!list_empty(&sublist))
+                               ip6_sublist_rcv_finish(&sublist);
+                       /* start new sublist */
+                       INIT_LIST_HEAD(&sublist);
+                       curr_dst = dst;
+               }
+               list_add_tail(&skb->list, &sublist);
+       }
+       /* dispatch final sublist */
+       ip6_sublist_rcv_finish(&sublist);
+}
+
+static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
+                                   struct net *net)
 {
        const struct ipv6hdr *hdr;
        u32 pkt_len;
        struct inet6_dev *idev;
-       struct net *net = dev_net(skb->dev);
 
        if (skb->pkt_type == PACKET_OTHERHOST) {
                kfree_skb(skb);
-               return NET_RX_DROP;
+               return NULL;
        }
 
        rcu_read_lock();
@@ -196,7 +243,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
                if (ipv6_parse_hopopts(skb) < 0) {
                        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
                        rcu_read_unlock();
-                       return NET_RX_DROP;
+                       return NULL;
                }
        }
 
@@ -205,15 +252,67 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
        /* Must drop socket now because of tproxy. */
        skb_orphan(skb);
 
-       return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
-                      net, NULL, skb, dev, NULL,
-                      ip6_rcv_finish);
+       return skb;
 err:
        __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 drop:
        rcu_read_unlock();
        kfree_skb(skb);
-       return NET_RX_DROP;
+       return NULL;
+}
+
+int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+{
+       struct net *net = dev_net(skb->dev);
+
+       skb = ip6_rcv_core(skb, dev, net);
+       if (skb == NULL)
+               return NET_RX_DROP;
+       return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+                      net, NULL, skb, dev, NULL,
+                      ip6_rcv_finish);
+}
+
+static void ip6_sublist_rcv(struct list_head *head, struct net_device *dev,
+                           struct net *net)
+{
+       NF_HOOK_LIST(NFPROTO_IPV6, NF_INET_PRE_ROUTING, net, NULL,
+                    head, dev, NULL, ip6_rcv_finish);
+       ip6_list_rcv_finish(net, NULL, head);
+}
+
+/* Receive a list of IPv6 packets */
+void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
+                  struct net_device *orig_dev)
+{
+       struct net_device *curr_dev = NULL;
+       struct net *curr_net = NULL;
+       struct sk_buff *skb, *next;
+       struct list_head sublist;
+
+       INIT_LIST_HEAD(&sublist);
+       list_for_each_entry_safe(skb, next, head, list) {
+               struct net_device *dev = skb->dev;
+               struct net *net = dev_net(dev);
+
+               list_del(&skb->list);
+               skb = ip6_rcv_core(skb, dev, net);
+               if (skb == NULL)
+                       continue;
+
+               if (curr_dev != dev || curr_net != net) {
+                       /* dispatch old sublist */
+                       if (!list_empty(&sublist))
+                               ip6_sublist_rcv(&sublist, curr_dev, curr_net);
+                       /* start new sublist */
+                       INIT_LIST_HEAD(&sublist);
+                       curr_dev = dev;
+                       curr_net = net;
+               }
+               list_add_tail(&skb->list, &sublist);
+       }
+       /* dispatch final sublist */
+       ip6_sublist_rcv(&sublist, curr_dev, curr_net);
 }
 
 /*
index 5b3f2f89ef41c3276ef4b478683bd9ab04a1d3da..37ff4805b20c73fd3c404a0904985bae68f21f23 100644 (file)
@@ -163,11 +163,11 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
        return len;
 }
 
-static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
-                                        struct sk_buff *skb)
+static struct sk_buff *ipv6_gro_receive(struct list_head *head,
+                                       struct sk_buff *skb)
 {
        const struct net_offload *ops;
-       struct sk_buff **pp = NULL;
+       struct sk_buff *pp = NULL;
        struct sk_buff *p;
        struct ipv6hdr *iph;
        unsigned int nlen;
@@ -214,7 +214,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
        flush--;
        nlen = skb_network_header_len(skb);
 
-       for (p = *head; p; p = p->next) {
+       list_for_each_entry(p, head, list) {
                const struct ipv6hdr *iph2;
                __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
 
@@ -263,8 +263,8 @@ out:
        return pp;
 }
 
-static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head,
-                                              struct sk_buff *skb)
+static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head,
+                                             struct sk_buff *skb)
 {
        /* Common GRO receive for SIT and IP6IP6 */
 
@@ -278,8 +278,8 @@ static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head,
        return ipv6_gro_receive(head, skb);
 }
 
-static struct sk_buff **ip4ip6_gro_receive(struct sk_buff **head,
-                                          struct sk_buff *skb)
+static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
+                                         struct sk_buff *skb)
 {
        /* Common GRO receive for SIT and IP6IP6 */
 
index 3168847c30d1d4a0021b7effc8653befce1d4d22..16f200f06500758c4cae84ea16229d5dbce912cb 100644 (file)
@@ -1221,13 +1221,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
        if (mtu < IPV6_MIN_MTU)
                return -EINVAL;
        cork->base.fragsize = mtu;
-       cork->base.gso_size = sk->sk_type == SOCK_DGRAM &&
-                             sk->sk_protocol == IPPROTO_UDP ? ipc6->gso_size : 0;
+       cork->base.gso_size = ipc6->gso_size;
+       cork->base.tx_flags = 0;
+       sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
 
        if (dst_allfrag(xfrm_dst_path(&rt->dst)))
                cork->base.flags |= IPCORK_ALLFRAG;
        cork->base.length = 0;
 
+       cork->base.transmit_time = ipc6->sockc.transmit_time;
+
        return 0;
 }
 
@@ -1240,8 +1243,7 @@ static int __ip6_append_data(struct sock *sk,
                             int getfrag(void *from, char *to, int offset,
                                         int len, int odd, struct sk_buff *skb),
                             void *from, int length, int transhdrlen,
-                            unsigned int flags, struct ipcm6_cookie *ipc6,
-                            const struct sockcm_cookie *sockc)
+                            unsigned int flags, struct ipcm6_cookie *ipc6)
 {
        struct sk_buff *skb, *skb_prev = NULL;
        unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
@@ -1251,7 +1253,6 @@ static int __ip6_append_data(struct sock *sk,
        int copy;
        int err;
        int offset = 0;
-       __u8 tx_flags = 0;
        u32 tskey = 0;
        struct rt6_info *rt = (struct rt6_info *)cork->dst;
        struct ipv6_txoptions *opt = v6_cork->opt;
@@ -1270,6 +1271,10 @@ static int __ip6_append_data(struct sock *sk,
        mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
        orig_mtu = mtu;
 
+       if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+           sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+               tskey = sk->sk_tskey++;
+
        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
        fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
@@ -1319,13 +1324,6 @@ emsgsize:
            rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
                csummode = CHECKSUM_PARTIAL;
 
-       if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
-               sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
-               if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
-                   sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
-                       tskey = sk->sk_tskey++;
-       }
-
        /*
         * Let's try using as much space as possible.
         * Use MTU if total length of the message fits into the MTU.
@@ -1444,8 +1442,8 @@ alloc_new_skb:
                                    dst_exthdrlen);
 
                        /* Only the initial fragment is time stamped */
-                       skb_shinfo(skb)->tx_flags = tx_flags;
-                       tx_flags = 0;
+                       skb_shinfo(skb)->tx_flags = cork->tx_flags;
+                       cork->tx_flags = 0;
                        skb_shinfo(skb)->tskey = tskey;
                        tskey = 0;
 
@@ -1562,8 +1560,7 @@ int ip6_append_data(struct sock *sk,
                                int odd, struct sk_buff *skb),
                    void *from, int length, int transhdrlen,
                    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
-                   struct rt6_info *rt, unsigned int flags,
-                   const struct sockcm_cookie *sockc)
+                   struct rt6_info *rt, unsigned int flags)
 {
        struct inet_sock *inet = inet_sk(sk);
        struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1591,7 +1588,7 @@ int ip6_append_data(struct sock *sk,
 
        return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
                                 &np->cork, sk_page_frag(sk), getfrag,
-                                from, length, transhdrlen, flags, ipc6, sockc);
+                                from, length, transhdrlen, flags, ipc6);
 }
 EXPORT_SYMBOL_GPL(ip6_append_data);
 
@@ -1675,6 +1672,8 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
        skb->priority = sk->sk_priority;
        skb->mark = sk->sk_mark;
 
+       skb->tstamp = cork->base.transmit_time;
+
        skb_dst_set(skb, dst_clone(&rt->dst));
        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
        if (proto == IPPROTO_ICMPV6) {
@@ -1749,8 +1748,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
                             void *from, int length, int transhdrlen,
                             struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
                             struct rt6_info *rt, unsigned int flags,
-                            struct inet_cork_full *cork,
-                            const struct sockcm_cookie *sockc)
+                            struct inet_cork_full *cork)
 {
        struct inet6_cork v6_cork;
        struct sk_buff_head queue;
@@ -1778,7 +1776,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
        err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
                                &current->task_frag, getfrag, from,
                                length + exthdrlen, transhdrlen + exthdrlen,
-                               flags, ipc6, sockc);
+                               flags, ipc6);
        if (err) {
                __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
                return ERR_PTR(err);
index 0d0f0053bb1151db200c2fdf403009fea84f4ee3..d0b7e0249c133619fbb081881e054cab393ebb49 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/compat.h>
+#include <linux/rhashtable.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/raw.h>
index 568ca4187cd101e745988ee262f79431ef8d28cc..c0cac9cc3a28190beee2f0313477048f84a0bf16 100644 (file)
@@ -500,7 +500,6 @@ sticky_done:
                struct ipv6_txoptions *opt = NULL;
                struct msghdr msg;
                struct flowi6 fl6;
-               struct sockcm_cookie sockc_junk;
                struct ipcm6_cookie ipc6;
 
                memset(&fl6, 0, sizeof(fl6));
@@ -533,7 +532,7 @@ sticky_done:
                msg.msg_control = (void *)(opt+1);
                ipc6.opt = opt;
 
-               retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6, &sockc_junk);
+               retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6);
                if (retv)
                        goto done;
 update:
index f60f310785fd6989ac37dfd05a35c60e58b7986a..4ae54aaca3736d168cceb0cefd254727486f8048 100644 (file)
@@ -660,7 +660,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
        return rv;
 }
 
-static void igmp6_group_added(struct ifmcaddr6 *mc, unsigned int mode)
+static void igmp6_group_added(struct ifmcaddr6 *mc)
 {
        struct net_device *dev = mc->idev->dev;
        char buf[MAX_ADDR_LEN];
@@ -690,7 +690,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc, unsigned int mode)
         * should not send filter-mode change record as the mode
         * should be from IN() to IN(A).
         */
-       if (mode == MCAST_EXCLUDE)
+       if (mc->mca_sfmode == MCAST_EXCLUDE)
                mc->mca_crcount = mc->idev->mc_qrv;
 
        mld_ifc_event(mc->idev);
@@ -931,7 +931,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
        write_unlock_bh(&idev->lock);
 
        mld_del_delrec(idev, mc);
-       igmp6_group_added(mc, mode);
+       igmp6_group_added(mc);
        ma_put(mc);
        return 0;
 }
@@ -2571,7 +2571,7 @@ void ipv6_mc_up(struct inet6_dev *idev)
        ipv6_mc_reset(idev);
        for (i = idev->mc_list; i; i = i->next) {
                mld_del_delrec(idev, i);
-               igmp6_group_added(i, i->mca_sfmode);
+               igmp6_group_added(i);
        }
        read_unlock_bh(&idev->lock);
 }
index 531d6957af36c4af48176f9360e9d95f78a45d55..5ae8e1c51079cb2cb36324af300762c5463134ba 100644 (file)
@@ -15,7 +15,6 @@
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
 #include <net/xfrm.h>
-#include <net/ip6_checksum.h>
 #include <net/netfilter/nf_queue.h>
 
 int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
@@ -106,71 +105,10 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst,
        return err;
 }
 
-__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
-                            unsigned int dataoff, u_int8_t protocol)
-{
-       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-       __sum16 csum = 0;
-
-       switch (skb->ip_summed) {
-       case CHECKSUM_COMPLETE:
-               if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
-                       break;
-               if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-                                    skb->len - dataoff, protocol,
-                                    csum_sub(skb->csum,
-                                             skb_checksum(skb, 0,
-                                                          dataoff, 0)))) {
-                       skb->ip_summed = CHECKSUM_UNNECESSARY;
-                       break;
-               }
-               /* fall through */
-       case CHECKSUM_NONE:
-               skb->csum = ~csum_unfold(
-                               csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-                                            skb->len - dataoff,
-                                            protocol,
-                                            csum_sub(0,
-                                                     skb_checksum(skb, 0,
-                                                                  dataoff, 0))));
-               csum = __skb_checksum_complete(skb);
-       }
-       return csum;
-}
-EXPORT_SYMBOL(nf_ip6_checksum);
-
-static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
-                                      unsigned int dataoff, unsigned int len,
-                                      u_int8_t protocol)
-{
-       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-       __wsum hsum;
-       __sum16 csum = 0;
-
-       switch (skb->ip_summed) {
-       case CHECKSUM_COMPLETE:
-               if (len == skb->len - dataoff)
-                       return nf_ip6_checksum(skb, hook, dataoff, protocol);
-               /* fall through */
-       case CHECKSUM_NONE:
-               hsum = skb_checksum(skb, 0, dataoff, 0);
-               skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
-                                                        &ip6h->daddr,
-                                                        skb->len - dataoff,
-                                                        protocol,
-                                                        csum_sub(0, hsum)));
-               skb->ip_summed = CHECKSUM_NONE;
-               return __skb_checksum_complete_head(skb, dataoff + len);
-       }
-       return csum;
-};
-
 static const struct nf_ipv6_ops ipv6ops = {
        .chk_addr               = ipv6_chk_addr,
        .route_input            = ip6_route_input,
        .fragment               = ip6_fragment,
-       .checksum               = nf_ip6_checksum,
-       .checksum_partial       = nf_ip6_checksum_partial,
        .route                  = nf_ip6_route,
        .reroute                = nf_ip6_reroute,
 };
index 37b14dc9d86388a488f2fb39f79e0aedf03b79bd..339d0762b02757d0034cd049908058fc7ea7f2fb 100644 (file)
@@ -5,26 +5,6 @@
 menu "IPv6: Netfilter Configuration"
        depends on INET && IPV6 && NETFILTER
 
-config NF_DEFRAG_IPV6
-       tristate
-       default n
-
-config NF_CONNTRACK_IPV6
-       tristate "IPv6 connection tracking support"
-       depends on INET && IPV6 && NF_CONNTRACK
-       default m if NETFILTER_ADVANCED=n
-       select NF_DEFRAG_IPV6
-       ---help---
-         Connection tracking keeps a record of what packets have passed
-         through your machine, in order to figure out how they are related
-         into connections.
-
-         This is IPv6 support on Layer 3 independent connection tracking.
-         Layer 3 independent connection tracking is experimental scheme
-         which generalize ip_conntrack to support other layer 3 protocols.
-
-         To compile it as a module, choose M here.  If unsure, say N.
-
 config NF_SOCKET_IPV6
        tristate "IPv6 socket lookup support"
        help
@@ -128,7 +108,7 @@ config NF_LOG_IPV6
 
 config NF_NAT_IPV6
        tristate "IPv6 NAT"
-       depends on NF_CONNTRACK_IPV6
+       depends on NF_CONNTRACK
        depends on NETFILTER_ADVANCED
        select NF_NAT
        help
@@ -328,7 +308,7 @@ config IP6_NF_SECURITY
 
 config IP6_NF_NAT
        tristate "ip6tables NAT support"
-       depends on NF_CONNTRACK_IPV6
+       depends on NF_CONNTRACK
        depends on NETFILTER_ADVANCED
        select NF_NAT
        select NF_NAT_IPV6
@@ -365,6 +345,7 @@ config IP6_NF_TARGET_NPT
 endif # IP6_NF_NAT
 
 endif # IP6_NF_IPTABLES
-
 endmenu
 
+config NF_DEFRAG_IPV6
+       tristate
index 10a5a1c873208ae8aeae9bb2015d8395763a3b76..200c0c2355650116eb78b4b2d0673c57848cd85a 100644 (file)
@@ -11,12 +11,6 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
 
-# objects for l3 independent conntrack
-nf_conntrack_ipv6-y  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
-
-# l3 independent conntrack
-obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
-
 nf_nat_ipv6-y          := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
 nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
 obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
deleted file mode 100644 (file)
index 663827e..0000000
+++ /dev/null
@@ -1,460 +0,0 @@
-/*
- * Copyright (C)2004 USAGI/WIDE Project
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Author:
- *     Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- */
-
-#include <linux/types.h>
-#include <linux/ipv6.h>
-#include <linux/in6.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/icmp.h>
-#include <net/ipv6.h>
-#include <net/inet_frag.h>
-
-#include <linux/netfilter_bridge.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
-#include <net/netfilter/nf_log.h>
-
-static int conntrack6_net_id;
-static DEFINE_MUTEX(register_ipv6_hooks);
-
-struct conntrack6_net {
-       unsigned int users;
-};
-
-static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
-                             struct nf_conntrack_tuple *tuple)
-{
-       const u_int32_t *ap;
-       u_int32_t _addrs[8];
-
-       ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
-                               sizeof(_addrs), _addrs);
-       if (ap == NULL)
-               return false;
-
-       memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
-       memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
-
-       return true;
-}
-
-static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
-                             const struct nf_conntrack_tuple *orig)
-{
-       memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
-       memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
-
-       return true;
-}
-
-static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
-                           unsigned int *dataoff, u_int8_t *protonum)
-{
-       unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
-       __be16 frag_off;
-       int protoff;
-       u8 nexthdr;
-
-       if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
-                         &nexthdr, sizeof(nexthdr)) != 0) {
-               pr_debug("ip6_conntrack_core: can't get nexthdr\n");
-               return -NF_ACCEPT;
-       }
-       protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
-       /*
-        * (protoff == skb->len) means the packet has not data, just
-        * IPv6 and possibly extensions headers, but it is tracked anyway
-        */
-       if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
-               pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
-               return -NF_ACCEPT;
-       }
-
-       *dataoff = protoff;
-       *protonum = nexthdr;
-       return NF_ACCEPT;
-}
-
-static unsigned int ipv6_helper(void *priv,
-                               struct sk_buff *skb,
-                               const struct nf_hook_state *state)
-{
-       struct nf_conn *ct;
-       const struct nf_conn_help *help;
-       const struct nf_conntrack_helper *helper;
-       enum ip_conntrack_info ctinfo;
-       __be16 frag_off;
-       int protoff;
-       u8 nexthdr;
-
-       /* This is where we call the helper: as the packet goes out. */
-       ct = nf_ct_get(skb, &ctinfo);
-       if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-               return NF_ACCEPT;
-
-       help = nfct_help(ct);
-       if (!help)
-               return NF_ACCEPT;
-       /* rcu_read_lock()ed by nf_hook_thresh */
-       helper = rcu_dereference(help->helper);
-       if (!helper)
-               return NF_ACCEPT;
-
-       nexthdr = ipv6_hdr(skb)->nexthdr;
-       protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
-                                  &frag_off);
-       if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
-               pr_debug("proto header not found\n");
-               return NF_ACCEPT;
-       }
-
-       return helper->help(skb, protoff, ct, ctinfo);
-}
-
-static unsigned int ipv6_confirm(void *priv,
-                                struct sk_buff *skb,
-                                const struct nf_hook_state *state)
-{
-       struct nf_conn *ct;
-       enum ip_conntrack_info ctinfo;
-       unsigned char pnum = ipv6_hdr(skb)->nexthdr;
-       int protoff;
-       __be16 frag_off;
-
-       ct = nf_ct_get(skb, &ctinfo);
-       if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-               goto out;
-
-       protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
-                                  &frag_off);
-       if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
-               pr_debug("proto header not found\n");
-               goto out;
-       }
-
-       /* adjust seqs for loopback traffic only in outgoing direction */
-       if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
-           !nf_is_loopback_packet(skb)) {
-               if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
-                       NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
-                       return NF_DROP;
-               }
-       }
-out:
-       /* We've seen it coming out the other side: confirm it */
-       return nf_conntrack_confirm(skb);
-}
-
-static unsigned int ipv6_conntrack_in(void *priv,
-                                     struct sk_buff *skb,
-                                     const struct nf_hook_state *state)
-{
-       return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
-}
-
-static unsigned int ipv6_conntrack_local(void *priv,
-                                        struct sk_buff *skb,
-                                        const struct nf_hook_state *state)
-{
-       return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
-}
-
-static const struct nf_hook_ops ipv6_conntrack_ops[] = {
-       {
-               .hook           = ipv6_conntrack_in,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_PRE_ROUTING,
-               .priority       = NF_IP6_PRI_CONNTRACK,
-       },
-       {
-               .hook           = ipv6_conntrack_local,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_LOCAL_OUT,
-               .priority       = NF_IP6_PRI_CONNTRACK,
-       },
-       {
-               .hook           = ipv6_helper,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_POST_ROUTING,
-               .priority       = NF_IP6_PRI_CONNTRACK_HELPER,
-       },
-       {
-               .hook           = ipv6_confirm,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_POST_ROUTING,
-               .priority       = NF_IP6_PRI_LAST,
-       },
-       {
-               .hook           = ipv6_helper,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_LOCAL_IN,
-               .priority       = NF_IP6_PRI_CONNTRACK_HELPER,
-       },
-       {
-               .hook           = ipv6_confirm,
-               .pf             = NFPROTO_IPV6,
-               .hooknum        = NF_INET_LOCAL_IN,
-               .priority       = NF_IP6_PRI_LAST-1,
-       },
-};
-
-static int
-ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
-{
-       struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
-       const struct ipv6_pinfo *inet6 = inet6_sk(sk);
-       const struct inet_sock *inet = inet_sk(sk);
-       const struct nf_conntrack_tuple_hash *h;
-       struct sockaddr_in6 sin6;
-       struct nf_conn *ct;
-       __be32 flow_label;
-       int bound_dev_if;
-
-       lock_sock(sk);
-       tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
-       tuple.src.u.tcp.port = inet->inet_sport;
-       tuple.dst.u3.in6 = sk->sk_v6_daddr;
-       tuple.dst.u.tcp.port = inet->inet_dport;
-       tuple.dst.protonum = sk->sk_protocol;
-       bound_dev_if = sk->sk_bound_dev_if;
-       flow_label = inet6->flow_label;
-       release_sock(sk);
-
-       if (tuple.dst.protonum != IPPROTO_TCP &&
-           tuple.dst.protonum != IPPROTO_SCTP)
-               return -ENOPROTOOPT;
-
-       if (*len < 0 || (unsigned int) *len < sizeof(sin6))
-               return -EINVAL;
-
-       h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
-       if (!h) {
-               pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
-                        &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
-                        &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
-               return -ENOENT;
-       }
-
-       ct = nf_ct_tuplehash_to_ctrack(h);
-
-       sin6.sin6_family = AF_INET6;
-       sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
-       sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
-       memcpy(&sin6.sin6_addr,
-               &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
-                                       sizeof(sin6.sin6_addr));
-
-       nf_ct_put(ct);
-       sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
-       return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
-}
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
-                               const struct nf_conntrack_tuple *tuple)
-{
-       if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
-           nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
-               goto nla_put_failure;
-       return 0;
-
-nla_put_failure:
-       return -1;
-}
-
-static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = {
-       [CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 },
-       [CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 },
-};
-
-static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
-                               struct nf_conntrack_tuple *t)
-{
-       if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
-               return -EINVAL;
-
-       t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
-       t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
-
-       return 0;
-}
-#endif
-
-static int ipv6_hooks_register(struct net *net)
-{
-       struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
-       int err = 0;
-
-       mutex_lock(&register_ipv6_hooks);
-       cnet->users++;
-       if (cnet->users > 1)
-               goto out_unlock;
-
-       err = nf_defrag_ipv6_enable(net);
-       if (err < 0) {
-               cnet->users = 0;
-               goto out_unlock;
-       }
-
-       err = nf_register_net_hooks(net, ipv6_conntrack_ops,
-                                   ARRAY_SIZE(ipv6_conntrack_ops));
-       if (err)
-               cnet->users = 0;
- out_unlock:
-       mutex_unlock(&register_ipv6_hooks);
-       return err;
-}
-
-static void ipv6_hooks_unregister(struct net *net)
-{
-       struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
-
-       mutex_lock(&register_ipv6_hooks);
-       if (cnet->users && (--cnet->users == 0))
-               nf_unregister_net_hooks(net, ipv6_conntrack_ops,
-                                       ARRAY_SIZE(ipv6_conntrack_ops));
-       mutex_unlock(&register_ipv6_hooks);
-}
-
-const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
-       .l3proto                = PF_INET6,
-       .pkt_to_tuple           = ipv6_pkt_to_tuple,
-       .invert_tuple           = ipv6_invert_tuple,
-       .get_l4proto            = ipv6_get_l4proto,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       .tuple_to_nlattr        = ipv6_tuple_to_nlattr,
-       .nlattr_to_tuple        = ipv6_nlattr_to_tuple,
-       .nla_policy             = ipv6_nla_policy,
-       .nla_size               = NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) +
-                                 NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])),
-#endif
-       .net_ns_get             = ipv6_hooks_register,
-       .net_ns_put             = ipv6_hooks_unregister,
-       .me                     = THIS_MODULE,
-};
-
-MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
-
-static struct nf_sockopt_ops so_getorigdst6 = {
-       .pf             = NFPROTO_IPV6,
-       .get_optmin     = IP6T_SO_ORIGINAL_DST,
-       .get_optmax     = IP6T_SO_ORIGINAL_DST + 1,
-       .get            = ipv6_getorigdst,
-       .owner          = THIS_MODULE,
-};
-
-static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = {
-       &nf_conntrack_l4proto_tcp6,
-       &nf_conntrack_l4proto_udp6,
-       &nf_conntrack_l4proto_icmpv6,
-#ifdef CONFIG_NF_CT_PROTO_DCCP
-       &nf_conntrack_l4proto_dccp6,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
-       &nf_conntrack_l4proto_sctp6,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-       &nf_conntrack_l4proto_udplite6,
-#endif
-};
-
-static int ipv6_net_init(struct net *net)
-{
-       return nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
-                                            ARRAY_SIZE(builtin_l4proto6));
-}
-
-static void ipv6_net_exit(struct net *net)
-{
-       nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
-                                       ARRAY_SIZE(builtin_l4proto6));
-}
-
-static struct pernet_operations ipv6_net_ops = {
-       .init = ipv6_net_init,
-       .exit = ipv6_net_exit,
-       .id = &conntrack6_net_id,
-       .size = sizeof(struct conntrack6_net),
-};
-
-static int __init nf_conntrack_l3proto_ipv6_init(void)
-{
-       int ret = 0;
-
-       need_conntrack();
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) !=
-           nf_conntrack_l3proto_ipv6.nla_size))
-               return -EINVAL;
-#endif
-
-       ret = nf_register_sockopt(&so_getorigdst6);
-       if (ret < 0) {
-               pr_err("Unable to register netfilter socket option\n");
-               return ret;
-       }
-
-       ret = register_pernet_subsys(&ipv6_net_ops);
-       if (ret < 0)
-               goto cleanup_sockopt;
-
-       ret = nf_ct_l4proto_register(builtin_l4proto6,
-                                    ARRAY_SIZE(builtin_l4proto6));
-       if (ret < 0)
-               goto cleanup_pernet;
-
-       ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
-       if (ret < 0) {
-               pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
-               goto cleanup_l4proto;
-       }
-       return ret;
-cleanup_l4proto:
-       nf_ct_l4proto_unregister(builtin_l4proto6,
-                                ARRAY_SIZE(builtin_l4proto6));
- cleanup_pernet:
-       unregister_pernet_subsys(&ipv6_net_ops);
- cleanup_sockopt:
-       nf_unregister_sockopt(&so_getorigdst6);
-       return ret;
-}
-
-static void __exit nf_conntrack_l3proto_ipv6_fini(void)
-{
-       synchronize_net();
-       nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
-       nf_ct_l4proto_unregister(builtin_l4proto6,
-                                ARRAY_SIZE(builtin_l4proto6));
-       unregister_pernet_subsys(&ipv6_net_ops);
-       nf_unregister_sockopt(&so_getorigdst6);
-}
-
-module_init(nf_conntrack_l3proto_ipv6_init);
-module_exit(nf_conntrack_l3proto_ipv6_fini);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
deleted file mode 100644 (file)
index 2548e2c..0000000
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Copyright (C)2003,2004 USAGI/WIDE Project
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Author:
- *     Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/in6.h>
-#include <linux/icmpv6.h>
-#include <linux/ipv6.h>
-#include <net/ipv6.h>
-#include <net/ip6_checksum.h>
-#include <linux/seq_file.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_conntrack_tuple.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
-#include <net/netfilter/nf_log.h>
-
-static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
-
-static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
-{
-       return &net->ct.nf_ct_proto.icmpv6;
-}
-
-static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
-                               unsigned int dataoff,
-                               struct net *net,
-                               struct nf_conntrack_tuple *tuple)
-{
-       const struct icmp6hdr *hp;
-       struct icmp6hdr _hdr;
-
-       hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-       if (hp == NULL)
-               return false;
-       tuple->dst.u.icmp.type = hp->icmp6_type;
-       tuple->src.u.icmp.id = hp->icmp6_identifier;
-       tuple->dst.u.icmp.code = hp->icmp6_code;
-
-       return true;
-}
-
-/* Add 1; spaces filled with 0. */
-static const u_int8_t invmap[] = {
-       [ICMPV6_ECHO_REQUEST - 128]     = ICMPV6_ECHO_REPLY + 1,
-       [ICMPV6_ECHO_REPLY - 128]       = ICMPV6_ECHO_REQUEST + 1,
-       [ICMPV6_NI_QUERY - 128]         = ICMPV6_NI_REPLY + 1,
-       [ICMPV6_NI_REPLY - 128]         = ICMPV6_NI_QUERY + 1
-};
-
-static const u_int8_t noct_valid_new[] = {
-       [ICMPV6_MGM_QUERY - 130] = 1,
-       [ICMPV6_MGM_REPORT - 130] = 1,
-       [ICMPV6_MGM_REDUCTION - 130] = 1,
-       [NDISC_ROUTER_SOLICITATION - 130] = 1,
-       [NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
-       [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
-       [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
-       [ICMPV6_MLD2_REPORT - 130] = 1
-};
-
-static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
-                               const struct nf_conntrack_tuple *orig)
-{
-       int type = orig->dst.u.icmp.type - 128;
-       if (type < 0 || type >= sizeof(invmap) || !invmap[type])
-               return false;
-
-       tuple->src.u.icmp.id   = orig->src.u.icmp.id;
-       tuple->dst.u.icmp.type = invmap[type] - 1;
-       tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
-       return true;
-}
-
-static unsigned int *icmpv6_get_timeouts(struct net *net)
-{
-       return &icmpv6_pernet(net)->timeout;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int icmpv6_packet(struct nf_conn *ct,
-                      const struct sk_buff *skb,
-                      unsigned int dataoff,
-                      enum ip_conntrack_info ctinfo,
-                      unsigned int *timeout)
-{
-       /* Do not immediately delete the connection after the first
-          successful reply to avoid excessive conntrackd traffic
-          and also to handle correctly ICMP echo reply duplicates. */
-       nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
-
-       return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
-                      unsigned int dataoff, unsigned int *timeouts)
-{
-       static const u_int8_t valid_new[] = {
-               [ICMPV6_ECHO_REQUEST - 128] = 1,
-               [ICMPV6_NI_QUERY - 128] = 1
-       };
-       int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
-
-       if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
-               /* Can't create a new ICMPv6 `conn' with this. */
-               pr_debug("icmpv6: can't create new conn with type %u\n",
-                        type + 128);
-               nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
-               return false;
-       }
-       return true;
-}
-
-static int
-icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
-                    struct sk_buff *skb,
-                    unsigned int icmp6off)
-{
-       struct nf_conntrack_tuple intuple, origtuple;
-       const struct nf_conntrack_tuple_hash *h;
-       const struct nf_conntrack_l4proto *inproto;
-       enum ip_conntrack_info ctinfo;
-       struct nf_conntrack_zone tmp;
-
-       WARN_ON(skb_nfct(skb));
-
-       /* Are they talking about one of our connections? */
-       if (!nf_ct_get_tuplepr(skb,
-                              skb_network_offset(skb)
-                               + sizeof(struct ipv6hdr)
-                               + sizeof(struct icmp6hdr),
-                              PF_INET6, net, &origtuple)) {
-               pr_debug("icmpv6_error: Can't get tuple\n");
-               return -NF_ACCEPT;
-       }
-
-       /* rcu_read_lock()ed by nf_hook_thresh */
-       inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
-
-       /* Ordinarily, we'd expect the inverted tupleproto, but it's
-          been preserved inside the ICMP. */
-       if (!nf_ct_invert_tuple(&intuple, &origtuple,
-                               &nf_conntrack_l3proto_ipv6, inproto)) {
-               pr_debug("icmpv6_error: Can't invert tuple\n");
-               return -NF_ACCEPT;
-       }
-
-       ctinfo = IP_CT_RELATED;
-
-       h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
-                                 &intuple);
-       if (!h) {
-               pr_debug("icmpv6_error: no match\n");
-               return -NF_ACCEPT;
-       } else {
-               if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
-                       ctinfo += IP_CT_IS_REPLY;
-       }
-
-       /* Update skb to refer to this connection */
-       nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
-       return NF_ACCEPT;
-}
-
-static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
-                            u8 pf, const char *msg)
-{
-       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
-}
-
-static int
-icmpv6_error(struct net *net, struct nf_conn *tmpl,
-            struct sk_buff *skb, unsigned int dataoff,
-            u8 pf, unsigned int hooknum)
-{
-       const struct icmp6hdr *icmp6h;
-       struct icmp6hdr _ih;
-       int type;
-
-       icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
-       if (icmp6h == NULL) {
-               icmpv6_error_log(skb, net, pf, "short packet");
-               return -NF_ACCEPT;
-       }
-
-       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-           nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
-               icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
-               return -NF_ACCEPT;
-       }
-
-       type = icmp6h->icmp6_type - 130;
-       if (type >= 0 && type < sizeof(noct_valid_new) &&
-           noct_valid_new[type]) {
-               nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
-               return NF_ACCEPT;
-       }
-
-       /* is not error message ? */
-       if (icmp6h->icmp6_type >= 128)
-               return NF_ACCEPT;
-
-       return icmpv6_error_message(net, tmpl, skb, dataoff);
-}
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
-                                 const struct nf_conntrack_tuple *t)
-{
-       if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) ||
-           nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) ||
-           nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code))
-               goto nla_put_failure;
-       return 0;
-
-nla_put_failure:
-       return -1;
-}
-
-static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
-       [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 },
-       [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 },
-       [CTA_PROTO_ICMPV6_ID]   = { .type = NLA_U16 },
-};
-
-static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
-                               struct nf_conntrack_tuple *tuple)
-{
-       if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
-           !tb[CTA_PROTO_ICMPV6_CODE] ||
-           !tb[CTA_PROTO_ICMPV6_ID])
-               return -EINVAL;
-
-       tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
-       tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
-       tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
-
-       if (tuple->dst.u.icmp.type < 128 ||
-           tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
-           !invmap[tuple->dst.u.icmp.type - 128])
-               return -EINVAL;
-
-       return 0;
-}
-
-static unsigned int icmpv6_nlattr_tuple_size(void)
-{
-       static unsigned int size __read_mostly;
-
-       if (!size)
-               size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
-
-       return size;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_cttimeout.h>
-
-static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
-                                       struct net *net, void *data)
-{
-       unsigned int *timeout = data;
-       struct nf_icmp_net *in = icmpv6_pernet(net);
-
-       if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
-               *timeout =
-                   ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
-       } else {
-               /* Set default ICMPv6 timeout. */
-               *timeout = in->timeout;
-       }
-       return 0;
-}
-
-static int
-icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
-{
-       const unsigned int *timeout = data;
-
-       if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
-               goto nla_put_failure;
-       return 0;
-
-nla_put_failure:
-       return -ENOSPC;
-}
-
-static const struct nla_policy
-icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
-       [CTA_TIMEOUT_ICMPV6_TIMEOUT]    = { .type = NLA_U32 },
-};
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table icmpv6_sysctl_table[] = {
-       {
-               .procname       = "nf_conntrack_icmpv6_timeout",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       { }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
-                                      struct nf_icmp_net *in)
-{
-#ifdef CONFIG_SYSCTL
-       pn->ctl_table = kmemdup(icmpv6_sysctl_table,
-                               sizeof(icmpv6_sysctl_table),
-                               GFP_KERNEL);
-       if (!pn->ctl_table)
-               return -ENOMEM;
-
-       pn->ctl_table[0].data = &in->timeout;
-#endif
-       return 0;
-}
-
-static int icmpv6_init_net(struct net *net, u_int16_t proto)
-{
-       struct nf_icmp_net *in = icmpv6_pernet(net);
-       struct nf_proto_net *pn = &in->pn;
-
-       in->timeout = nf_ct_icmpv6_timeout;
-
-       return icmpv6_kmemdup_sysctl_table(pn, in);
-}
-
-static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
-{
-       return &net->ct.nf_ct_proto.icmpv6.pn;
-}
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
-{
-       .l3proto                = PF_INET6,
-       .l4proto                = IPPROTO_ICMPV6,
-       .pkt_to_tuple           = icmpv6_pkt_to_tuple,
-       .invert_tuple           = icmpv6_invert_tuple,
-       .packet                 = icmpv6_packet,
-       .get_timeouts           = icmpv6_get_timeouts,
-       .new                    = icmpv6_new,
-       .error                  = icmpv6_error,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       .tuple_to_nlattr        = icmpv6_tuple_to_nlattr,
-       .nlattr_tuple_size      = icmpv6_nlattr_tuple_size,
-       .nlattr_to_tuple        = icmpv6_nlattr_to_tuple,
-       .nla_policy             = icmpv6_nla_policy,
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
-       .ctnl_timeout           = {
-               .nlattr_to_obj  = icmpv6_timeout_nlattr_to_obj,
-               .obj_to_nlattr  = icmpv6_timeout_obj_to_nlattr,
-               .nlattr_max     = CTA_TIMEOUT_ICMP_MAX,
-               .obj_size       = sizeof(unsigned int),
-               .nla_policy     = icmpv6_timeout_nla_policy,
-       },
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-       .init_net               = icmpv6_init_net,
-       .get_net_proto          = icmpv6_get_net_proto,
-};
index e4d9e6976d3c295e68b13c0ceecd5fa76db4fbc1..0610bdab721cb9d6d69ac6cd7a5d21367efed23c 100644 (file)
@@ -33,9 +33,8 @@
 
 #include <net/sock.h>
 #include <net/snmp.h>
-#include <net/inet_frag.h>
+#include <net/ipv6_frag.h>
 
-#include <net/ipv6.h>
 #include <net/protocol.h>
 #include <net/transp_v6.h>
 #include <net/rawv6.h>
@@ -151,7 +150,7 @@ static void nf_ct_frag6_expire(struct timer_list *t)
        fq = container_of(frag, struct frag_queue, q);
        net = container_of(fq->q.net, struct net, nf_frag.frags);
 
-       ip6_expire_frag_queue(net, fq);
+       ip6frag_expire_frag_queue(net, fq);
 }
 
 /* Creation primitives. */
@@ -624,16 +623,24 @@ static struct pernet_operations nf_ct_net_ops = {
        .exit = nf_ct_net_exit,
 };
 
+static const struct rhashtable_params nfct_rhash_params = {
+       .head_offset            = offsetof(struct inet_frag_queue, node),
+       .hashfn                 = ip6frag_key_hashfn,
+       .obj_hashfn             = ip6frag_obj_hashfn,
+       .obj_cmpfn              = ip6frag_obj_cmpfn,
+       .automatic_shrinking    = true,
+};
+
 int nf_ct_frag6_init(void)
 {
        int ret = 0;
 
-       nf_frags.constructor = ip6_frag_init;
+       nf_frags.constructor = ip6frag_init;
        nf_frags.destructor = NULL;
        nf_frags.qsize = sizeof(struct frag_queue);
        nf_frags.frag_expire = nf_ct_frag6_expire;
        nf_frags.frags_cache_name = nf_frags_cache_name;
-       nf_frags.rhash_params = ip6_rhash_params;
+       nf_frags.rhash_params = nfct_rhash_params;
        ret = inet_frags_init(&nf_frags);
        if (ret)
                goto out;
index c87b48359e8f482d6e8deb91a19502b484f718ed..72dd3e20237523e37e88a6858af11ce4620aed4f 100644 (file)
@@ -14,8 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
 #include <linux/sysctl.h>
-#include <net/ipv6.h>
-#include <net/inet_frag.h>
+#include <net/ipv6_frag.h>
 
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_bridge.h>
@@ -23,7 +22,6 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 #endif
index b397a8fe88b9391e462146391901a360969547c0..c6bf580d0f331d8e22df15365c7084d9a90899ec 100644 (file)
@@ -36,7 +36,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* One level of recursion won't kill us */
-static void dump_ipv6_packet(struct nf_log_buf *m,
+static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
                             const struct nf_loginfo *info,
                             const struct sk_buff *skb, unsigned int ip6hoff,
                             int recurse)
@@ -258,7 +258,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
                        /* Max length: 3+maxlen */
                        if (recurse) {
                                nf_log_buf_add(m, "[");
-                               dump_ipv6_packet(m, info, skb,
+                               dump_ipv6_packet(net, m, info, skb,
                                                 ptr + sizeof(_icmp6h), 0);
                                nf_log_buf_add(m, "] ");
                        }
@@ -278,7 +278,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
 
        /* Max length: 15 "UID=4294967295 " */
        if ((logflags & NF_LOG_UID) && recurse)
-               nf_log_dump_sk_uid_gid(m, skb->sk);
+               nf_log_dump_sk_uid_gid(net, m, skb->sk);
 
        /* Max length: 16 "MARK=0xFFFFFFFF " */
        if (recurse && skb->mark)
@@ -365,7 +365,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
        if (in != NULL)
                dump_ipv6_mac_header(m, loginfo, skb);
 
-       dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
+       dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
 
        nf_log_buf_close(m);
 }
index 96f56bf49a30533ae693f11de286ff310a9fe356..4c04bccc74171058760b528f30ae917326e3b301 100644 (file)
@@ -62,7 +62,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        struct dst_entry *dst;
        struct rt6_info *rt;
        struct pingfakehdr pfh;
-       struct sockcm_cookie junk = {0};
        struct ipcm6_cookie ipc6;
 
        pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -119,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        fl6.fl6_icmp_code = user_icmph.icmp6_code;
        security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-       ipc6.tclass = np->tclass;
+       ipcm6_init_sk(&ipc6, np);
        fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
        dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
@@ -142,13 +141,11 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        pfh.family = AF_INET6;
 
        ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-       ipc6.dontfrag = np->dontfrag;
-       ipc6.opt = NULL;
 
        lock_sock(sk);
        err = ip6_append_data(sk, ping_getfrag, &pfh, len,
                              0, &ipc6, &fl6, rt,
-                             MSG_DONTWAIT, &junk);
+                             MSG_DONTWAIT);
 
        if (err) {
                ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
index afc307c89d1a977a00693999ec0f54b50005b7bd..413d98bf24f4c9f9644b79590369b9188713926e 100644 (file)
@@ -620,7 +620,7 @@ out:
 
 static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
                        struct flowi6 *fl6, struct dst_entry **dstp,
-                       unsigned int flags)
+                       unsigned int flags, const struct sockcm_cookie *sockc)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct net *net = sock_net(sk);
@@ -650,6 +650,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
        skb->protocol = htons(ETH_P_IPV6);
        skb->priority = sk->sk_priority;
        skb->mark = sk->sk_mark;
+       skb->tstamp = sockc->transmit_time;
        skb_dst_set(skb, &rt->dst);
        *dstp = NULL;
 
@@ -766,7 +767,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        struct dst_entry *dst = NULL;
        struct raw6_frag_vec rfv;
        struct flowi6 fl6;
-       struct sockcm_cookie sockc;
        struct ipcm6_cookie ipc6;
        int addr_len = msg->msg_namelen;
        u16 proto;
@@ -790,10 +790,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        fl6.flowi6_mark = sk->sk_mark;
        fl6.flowi6_uid = sk->sk_uid;
 
-       ipc6.hlimit = -1;
-       ipc6.tclass = -1;
-       ipc6.dontfrag = -1;
-       ipc6.opt = NULL;
+       ipcm6_init(&ipc6);
+       ipc6.sockc.tsflags = sk->sk_tsflags;
 
        if (sin6) {
                if (addr_len < SIN6_LEN_RFC2133)
@@ -847,14 +845,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        if (fl6.flowi6_oif == 0)
                fl6.flowi6_oif = sk->sk_bound_dev_if;
 
-       sockc.tsflags = sk->sk_tsflags;
        if (msg->msg_controllen) {
                opt = &opt_space;
                memset(opt, 0, sizeof(struct ipv6_txoptions));
                opt->tot_len = sizeof(struct ipv6_txoptions);
                ipc6.opt = opt;
 
-               err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
+               err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6);
                if (err < 0) {
                        fl6_sock_release(flowlabel);
                        return err;
@@ -921,13 +918,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 back_from_confirm:
        if (inet->hdrincl)
-               err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
+               err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst,
+                                       msg->msg_flags, &ipc6.sockc);
        else {
                ipc6.opt = opt;
                lock_sock(sk);
                err = ip6_append_data(sk, raw6_getfrag, &rfv,
                        len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
-                       msg->msg_flags, &sockc);
+                       msg->msg_flags);
 
                if (err)
                        ip6_flush_pending_frames(sk);
index b939b94e7e91ddae1552f0b6f6a54c42ab180615..6edd2ac8ae4bd0946b81e787aedc2486be02b990 100644 (file)
@@ -57,7 +57,7 @@
 #include <net/rawv6.h>
 #include <net/ndisc.h>
 #include <net/addrconf.h>
-#include <net/inet_frag.h>
+#include <net/ipv6_frag.h>
 #include <net/inet_ecn.h>
 
 static const char ip6_frag_cache_name[] = "ip6-frags";
@@ -72,61 +72,6 @@ static struct inet_frags ip6_frags;
 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
                          struct net_device *dev);
 
-void ip6_frag_init(struct inet_frag_queue *q, const void *a)
-{
-       struct frag_queue *fq = container_of(q, struct frag_queue, q);
-       const struct frag_v6_compare_key *key = a;
-
-       q->key.v6 = *key;
-       fq->ecn = 0;
-}
-EXPORT_SYMBOL(ip6_frag_init);
-
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
-{
-       struct net_device *dev = NULL;
-       struct sk_buff *head;
-
-       rcu_read_lock();
-       spin_lock(&fq->q.lock);
-
-       if (fq->q.flags & INET_FRAG_COMPLETE)
-               goto out;
-
-       inet_frag_kill(&fq->q);
-
-       dev = dev_get_by_index_rcu(net, fq->iif);
-       if (!dev)
-               goto out;
-
-       __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
-       __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
-
-       /* Don't send error if the first segment did not arrive. */
-       head = fq->q.fragments;
-       if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
-               goto out;
-
-       /* But use as source device on which LAST ARRIVED
-        * segment was received. And do not use fq->dev
-        * pointer directly, device might already disappeared.
-        */
-       head->dev = dev;
-       skb_get(head);
-       spin_unlock(&fq->q.lock);
-
-       icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
-       kfree_skb(head);
-       goto out_rcu_unlock;
-
-out:
-       spin_unlock(&fq->q.lock);
-out_rcu_unlock:
-       rcu_read_unlock();
-       inet_frag_put(&fq->q);
-}
-EXPORT_SYMBOL(ip6_expire_frag_queue);
-
 static void ip6_frag_expire(struct timer_list *t)
 {
        struct inet_frag_queue *frag = from_timer(frag, t, timer);
@@ -136,7 +81,7 @@ static void ip6_frag_expire(struct timer_list *t)
        fq = container_of(frag, struct frag_queue, q);
        net = container_of(fq->q.net, struct net, ipv6.frags);
 
-       ip6_expire_frag_queue(net, fq);
+       ip6frag_expire_frag_queue(net, fq);
 }
 
 static struct frag_queue *
@@ -696,42 +641,19 @@ static struct pernet_operations ip6_frags_ops = {
        .exit = ipv6_frags_exit_net,
 };
 
-static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
-{
-       return jhash2(data,
-                     sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
-}
-
-static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
-{
-       const struct inet_frag_queue *fq = data;
-
-       return jhash2((const u32 *)&fq->key.v6,
-                     sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
-}
-
-static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
-{
-       const struct frag_v6_compare_key *key = arg->key;
-       const struct inet_frag_queue *fq = ptr;
-
-       return !!memcmp(&fq->key, key, sizeof(*key));
-}
-
-const struct rhashtable_params ip6_rhash_params = {
+static const struct rhashtable_params ip6_rhash_params = {
        .head_offset            = offsetof(struct inet_frag_queue, node),
-       .hashfn                 = ip6_key_hashfn,
-       .obj_hashfn             = ip6_obj_hashfn,
-       .obj_cmpfn              = ip6_obj_cmpfn,
+       .hashfn                 = ip6frag_key_hashfn,
+       .obj_hashfn             = ip6frag_obj_hashfn,
+       .obj_cmpfn              = ip6frag_obj_cmpfn,
        .automatic_shrinking    = true,
 };
-EXPORT_SYMBOL(ip6_rhash_params);
 
 int __init ipv6_frag_init(void)
 {
        int ret;
 
-       ip6_frags.constructor = ip6_frag_init;
+       ip6_frags.constructor = ip6frag_init;
        ip6_frags.destructor = NULL;
        ip6_frags.qsize = sizeof(struct frag_queue);
        ip6_frags.frag_expire = ip6_frag_expire;
index 0fdf2a55e746c9b66c018c2e357b473759afd83e..8d0ba757a46ce52bf85d5e88d43567b5bd96678d 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <net/ipv6.h>
 #include <net/protocol.h>
index 558fe8cc6d43858ca828cbd8dc8ea65e63bc6602..8546f94f30d47a203f3d2f4d73f3ece462bb4e4e 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/icmpv6.h>
 #include <linux/mroute6.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
index cd6e4cab63f6ca37e0bd7699ca1208ae94aad410..e1025b493a185ee8eaba0941c05bc293b025343f 100644 (file)
@@ -637,12 +637,10 @@ static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
        if (!seg6_validate_srh(srh, len))
                return -EINVAL;
 
-       slwt->srh = kmalloc(len, GFP_KERNEL);
+       slwt->srh = kmemdup(srh, len, GFP_KERNEL);
        if (!slwt->srh)
                return -ENOMEM;
 
-       memcpy(slwt->srh, srh, len);
-
        slwt->headroom += len;
 
        return 0;
index 278e49cd67d4e2c7b0ab9138fabe84753d628b5a..e72947c99454e54fefee30efa8aeea9bc13908b5 100644 (file)
@@ -15,8 +15,8 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
-                                        struct sk_buff *skb)
+static struct sk_buff *tcp6_gro_receive(struct list_head *head,
+                                       struct sk_buff *skb)
 {
        /* Don't bother verifying checksum if we're going to flush anyway. */
        if (!NAPI_GRO_CB(skb)->flush &&
index e6645cae403ed81004404049a1b95927921bfff6..f6b96956a8edf4b9160425c89b28348af0b1bda7 100644 (file)
@@ -1141,13 +1141,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        int err;
        int is_udplite = IS_UDPLITE(sk);
        int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
-       struct sockcm_cookie sockc;
 
-       ipc6.hlimit = -1;
-       ipc6.tclass = -1;
-       ipc6.dontfrag = -1;
+       ipcm6_init(&ipc6);
        ipc6.gso_size = up->gso_size;
-       sockc.tsflags = sk->sk_tsflags;
+       ipc6.sockc.tsflags = sk->sk_tsflags;
 
        /* destination address check */
        if (sin6) {
@@ -1282,7 +1279,7 @@ do_udp_sendmsg:
                err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
                if (err > 0)
                        err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
-                                                   &ipc6, &sockc);
+                                                   &ipc6);
                if (err < 0) {
                        fl6_sock_release(flowlabel);
                        return err;
@@ -1376,7 +1373,7 @@ back_from_confirm:
                skb = ip6_make_skb(sk, getfrag, msg, ulen,
                                   sizeof(struct udphdr), &ipc6,
                                   &fl6, (struct rt6_info *)dst,
-                                  msg->msg_flags, &cork, &sockc);
+                                  msg->msg_flags, &cork);
                err = PTR_ERR(skb);
                if (!IS_ERR_OR_NULL(skb))
                        err = udp_v6_send_skb(skb, &fl6, &cork.base);
@@ -1402,7 +1399,7 @@ do_append_data:
        up->len += ulen;
        err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
                              &ipc6, &fl6, (struct rt6_info *)dst,
-                             corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, &sockc);
+                             corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
        if (err)
                udp_v6_flush_pending_frames(sk);
        else if (!corkreq)
index 03a2ff3fe1e697e752e2aa9f13703b6feaff0453..95dee9ca8d22186486b09ef7514ec69e0985ff3a 100644 (file)
@@ -114,8 +114,8 @@ out:
        return segs;
 }
 
-static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
-                                        struct sk_buff *skb)
+static struct sk_buff *udp6_gro_receive(struct list_head *head,
+                                       struct sk_buff *skb)
 {
        struct udphdr *uh = udp_gro_udphdr(skb);
 
index 07d36573f50b9451e4c2bfee331ac2c023791a7a..da28e4407b8faa91eeb7517e87e9b644ae3f841a 100644 (file)
@@ -55,7 +55,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
        __skb_pull(skb, hdr_len);
        memmove(ipv6_hdr(skb), iph, hdr_len);
 
-       x->lastused = get_seconds();
+       x->lastused = ktime_get_real_seconds();
 
        return 0;
 }
index 893a022f962081416fa1b9e5f96416a8c2e92e5c..92ee91e343959036fb238c060af85937e4e16a7b 100644 (file)
@@ -1494,7 +1494,7 @@ __poll_t iucv_sock_poll(struct file *file, struct socket *sock,
        struct sock *sk = sock->sk;
        __poll_t mask = 0;
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
 
        if (sk->sk_state == IUCV_LISTEN)
                return iucv_accept_poll(sk);
@@ -2515,4 +2515,3 @@ MODULE_DESCRIPTION("IUCV Sockets ver " VERSION);
 MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(PF_IUCV);
-
index 87fca36e6c47b98663a9e8ee1172057818324c1d..9ca83f2ade6ff66df1cf6d4476eb766cfb573740 100644 (file)
@@ -8,4 +8,3 @@ config AF_KCM
          KCM (Kernel Connection Multiplexor) sockets provide a method
          for multiplexing messages of a message based application
          protocol over kernel connectons (e.g. TCP connections).
-
index d3601d421571b9825ff0a6cea9b75cb52fd51dea..571d824e4e2499e1f591701ed0e25c4aff73b00a 100644 (file)
@@ -2104,4 +2104,3 @@ module_exit(kcm_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(PF_KCM);
-
index 5e1d2946ffbf2a2cf4e65db44658c7f374e72e25..9d61266526e767770d9a1ce184ac8cdd59de309a 100644 (file)
@@ -1383,7 +1383,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
        }
 
        if (!x)
-               x = xfrm_find_acq(net, &dummy_mark, mode, reqid, proto, xdaddr, xsaddr, 1, family);
+               x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
 
        if (x == NULL)
                return -ENOENT;
@@ -2414,7 +2414,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
                        return err;
        }
 
-       xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN,
+       xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
                                   pol->sadb_x_policy_dir - 1, &sel, pol_ctx,
                                   1, &err);
        security_xfrm_policy_free(pol_ctx);
@@ -2663,7 +2663,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
                return -EINVAL;
 
        delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
-       xp = xfrm_policy_byid(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN,
+       xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
                              dir, pol->sadb_x_policy_id, delete, &err);
        if (xp == NULL)
                return -ENOENT;
index 40261cb68e83686c73a3567062fbf28f4a3d3146..ac6a00bcec717237e8b6b5f6b46de0a0afffc57d 100644 (file)
@@ -322,8 +322,7 @@ int l2tp_session_register(struct l2tp_session *session,
 
        if (tunnel->version == L2TP_HDR_VER_3) {
                pn = l2tp_pernet(tunnel->l2tp_net);
-               g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net),
-                                               session->session_id);
+               g_head = l2tp_session_id_hash_2(pn, session->session_id);
 
                spin_lock_bh(&pn->l2tp_session_hlist_lock);
 
@@ -620,7 +619,7 @@ discard:
  */
 void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
                      unsigned char *ptr, unsigned char *optr, u16 hdrflags,
-                     int length, int (*payload_hook)(struct sk_buff *skb))
+                     int length)
 {
        struct l2tp_tunnel *tunnel = session->tunnel;
        int offset;
@@ -741,13 +740,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 
        __skb_pull(skb, offset);
 
-       /* If caller wants to process the payload before we queue the
-        * packet, do so now.
-        */
-       if (payload_hook)
-               if ((*payload_hook)(skb))
-                       goto discard;
-
        /* Prepare skb for adding to the session's reorder_q.  Hold
         * packets for max reorder_timeout or 1 second if not
         * reordering.
@@ -783,7 +775,7 @@ EXPORT_SYMBOL(l2tp_recv_common);
 
 /* Drop skbs from the session's reorder_q
  */
-int l2tp_session_queue_purge(struct l2tp_session *session)
+static int l2tp_session_queue_purge(struct l2tp_session *session)
 {
        struct sk_buff *skb = NULL;
        BUG_ON(!session);
@@ -794,7 +786,6 @@ int l2tp_session_queue_purge(struct l2tp_session *session)
        }
        return 0;
 }
-EXPORT_SYMBOL_GPL(l2tp_session_queue_purge);
 
 /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
  * here. The skb is not on a list when we get here.
@@ -802,8 +793,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_queue_purge);
  * Returns 1 if the packet was not a good data packet and could not be
  * forwarded.  All such packets are passed up to userspace to deal with.
  */
-static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
-                             int (*payload_hook)(struct sk_buff *skb))
+static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
 {
        struct l2tp_session *session = NULL;
        unsigned char *ptr, *optr;
@@ -894,7 +884,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
                goto error;
        }
 
-       l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
+       l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
        l2tp_session_dec_refcount(session);
 
        return 0;
@@ -923,7 +913,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
        l2tp_dbg(tunnel, L2TP_MSG_DATA, "%s: received %d bytes\n",
                 tunnel->name, skb->len);
 
-       if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook))
+       if (l2tp_udp_recv_core(tunnel, skb))
                goto pass_up;
 
        return 0;
@@ -1009,8 +999,8 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
        return bufp - optr;
 }
 
-static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
-                         struct flowi *fl, size_t data_len)
+static void l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
+                          struct flowi *fl, size_t data_len)
 {
        struct l2tp_tunnel *tunnel = session->tunnel;
        unsigned int len = skb->len;
@@ -1052,8 +1042,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
                atomic_long_inc(&tunnel->stats.tx_errors);
                atomic_long_inc(&session->stats.tx_errors);
        }
-
-       return 0;
 }
 
 /* If caller requires the skb to have a ppp header, the header must be
@@ -1193,7 +1181,7 @@ end:
 
 /* When the tunnel is closed, all the attached sessions need to go too.
  */
-void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
 {
        int hash;
        struct hlist_node *walk;
@@ -1242,7 +1230,6 @@ again:
        }
        write_unlock_bh(&tunnel->hlist_lock);
 }
-EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
 
 /* Tunnel socket destroy hook for UDP encapsulation */
 static void l2tp_udp_encap_destroy(struct sock *sk)
@@ -1687,8 +1674,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
                if (cfg) {
                        session->pwtype = cfg->pw_type;
                        session->debug = cfg->debug;
-                       session->mtu = cfg->mtu;
-                       session->mru = cfg->mru;
                        session->send_seq = cfg->send_seq;
                        session->recv_seq = cfg->recv_seq;
                        session->lns_mode = cfg->lns_mode;
@@ -1800,4 +1785,3 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("L2TP core");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(L2TP_DRV_VERSION);
-
index c199020f8a8a315590a53f89aaf9370debcb09db..5804065dfbfb0f4a57ef8793e7f79514cf4f11cd 100644 (file)
@@ -12,6 +12,9 @@
 #ifndef _L2TP_CORE_H_
 #define _L2TP_CORE_H_
 
+#include <net/dst.h>
+#include <net/sock.h>
+
 /* Just some random numbers */
 #define L2TP_TUNNEL_MAGIC      0x42114DDA
 #define L2TP_SESSION_MAGIC     0x0C04EB7D
@@ -45,10 +48,6 @@ struct l2tp_tunnel;
  */
 struct l2tp_session_cfg {
        enum l2tp_pwtype        pw_type;
-       unsigned int            data_seq:2;     /* data sequencing level
-                                                * 0 => none, 1 => IP only,
-                                                * 2 => all
-                                                */
        unsigned int            recv_seq:1;     /* expect receive packets with
                                                 * sequence numbers? */
        unsigned int            send_seq:1;     /* send packets with sequence
@@ -58,7 +57,6 @@ struct l2tp_session_cfg {
                                                 * control of LNS. */
        int                     debug;          /* bitmask of debug message
                                                 * categories */
-       u16                     vlan_id;        /* VLAN pseudowire only */
        u16                     l2specific_type; /* Layer 2 specific type */
        u8                      cookie[8];      /* optional cookie */
        int                     cookie_len;     /* 0, 4 or 8 bytes */
@@ -66,8 +64,6 @@ struct l2tp_session_cfg {
        int                     peer_cookie_len; /* 0, 4 or 8 bytes */
        int                     reorder_timeout; /* configured reorder timeout
                                                  * (in jiffies) */
-       int                     mtu;
-       int                     mru;
        char                    *ifname;
 };
 
@@ -99,10 +95,6 @@ struct l2tp_session {
 
        char                    name[32];       /* for logging */
        char                    ifname[IFNAMSIZ];
-       unsigned int            data_seq:2;     /* data sequencing level
-                                                * 0 => none, 1 => IP only,
-                                                * 2 => all
-                                                */
        unsigned int            recv_seq:1;     /* expect receive packets with
                                                 * sequence numbers? */
        unsigned int            send_seq:1;     /* send packets with sequence
@@ -115,8 +107,6 @@ struct l2tp_session {
        int                     reorder_timeout; /* configured reorder timeout
                                                  * (in jiffies) */
        int                     reorder_skip;   /* set if skip to next nr */
-       int                     mtu;
-       int                     mru;
        enum l2tp_pwtype        pwtype;
        struct l2tp_stats       stats;
        struct hlist_node       global_hlist;   /* Global hash list node */
@@ -180,18 +170,12 @@ struct l2tp_tunnel {
        struct net              *l2tp_net;      /* the net we belong to */
 
        refcount_t              ref_count;
-#ifdef CONFIG_DEBUG_FS
-       void (*show)(struct seq_file *m, void *arg);
-#endif
-       int (*recv_payload_hook)(struct sk_buff *skb);
        void (*old_sk_destruct)(struct sock *);
        struct sock             *sock;          /* Parent socket */
        int                     fd;             /* Parent fd, if tunnel socket
                                                 * was created by userspace */
 
        struct work_struct      del_work;
-
-       uint8_t                 priv[0];        /* private data */
 };
 
 struct l2tp_nl_cmd_ops {
@@ -201,11 +185,6 @@ struct l2tp_nl_cmd_ops {
        int (*session_delete)(struct l2tp_session *session);
 };
 
-static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
-{
-       return &tunnel->priv[0];
-}
-
 static inline void *l2tp_session_priv(struct l2tp_session *session)
 {
        return &session->priv[0];
@@ -229,7 +208,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
 int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
                         struct l2tp_tunnel_cfg *cfg);
 
-void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 struct l2tp_session *l2tp_session_create(int priv_size,
                                         struct l2tp_tunnel *tunnel,
@@ -243,8 +221,7 @@ int l2tp_session_delete(struct l2tp_session *session);
 void l2tp_session_free(struct l2tp_session *session);
 void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
                      unsigned char *ptr, unsigned char *optr, u16 hdrflags,
-                     int length, int (*payload_hook)(struct sk_buff *skb));
-int l2tp_session_queue_purge(struct l2tp_session *session);
+                     int length);
 int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
 void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 
@@ -292,6 +269,21 @@ static inline int l2tp_get_l2specific_len(struct l2tp_session *session)
        }
 }
 
+static inline u32 l2tp_tunnel_dst_mtu(const struct l2tp_tunnel *tunnel)
+{
+       struct dst_entry *dst;
+       u32 mtu;
+
+       dst = sk_dst_get(tunnel->sock);
+       if (!dst)
+               return 0;
+
+       mtu = dst_mtu(dst);
+       dst_release(dst);
+
+       return mtu;
+}
+
 #define l2tp_printk(ptr, type, func, fmt, ...)                         \
 do {                                                                   \
        if (((ptr)->debug) & (type))                                    \
index e87686f7d63ca7d98f9bcb60e72d36a684324d21..9821a1458555d3ad04a88749ad4f544784863be8 100644 (file)
@@ -177,9 +177,6 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
                   atomic_long_read(&tunnel->stats.rx_packets),
                   atomic_long_read(&tunnel->stats.rx_bytes),
                   atomic_long_read(&tunnel->stats.rx_errors));
-
-       if (tunnel->show != NULL)
-               tunnel->show(m, tunnel);
 }
 
 static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
@@ -194,12 +191,9 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
        if (session->send_seq || session->recv_seq)
                seq_printf(m, "   nr %hu, ns %hu\n", session->nr, session->ns);
        seq_printf(m, "   refcnt %d\n", refcount_read(&session->ref_count));
-       seq_printf(m, "   config %d/%d/%c/%c/%s/%s %08x %u\n",
-                  session->mtu, session->mru,
+       seq_printf(m, "   config 0/0/%c/%c/-/%s %08x %u\n",
                   session->recv_seq ? 'R' : '-',
                   session->send_seq ? 'S' : '-',
-                  session->data_seq == 1 ? "IPSEQ" :
-                  session->data_seq == 2 ? "DATASEQ" : "-",
                   session->lns_mode ? "LNS" : "LAC",
                   session->debug,
                   jiffies_to_msecs(session->reorder_timeout));
index 5c366ecfa1cb0e2918c54f30d80ab30780f4cfdb..3728986ec8858537bb233cddaf359a029eca1eb2 100644 (file)
@@ -226,22 +226,19 @@ static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
                                struct net_device *dev)
 {
        unsigned int overhead = 0;
-       struct dst_entry *dst;
        u32 l3_overhead = 0;
+       u32 mtu;
 
        /* if the encap is UDP, account for UDP header size */
        if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
                overhead += sizeof(struct udphdr);
                dev->needed_headroom += sizeof(struct udphdr);
        }
-       if (session->mtu != 0) {
-               dev->mtu = session->mtu;
-               dev->needed_headroom += session->hdr_len;
-               return;
-       }
+
        lock_sock(tunnel->sock);
        l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
        release_sock(tunnel->sock);
+
        if (l3_overhead == 0) {
                /* L3 Overhead couldn't be identified, this could be
                 * because tunnel->sock was NULL or the socket's
@@ -255,18 +252,12 @@ static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
         */
        overhead += session->hdr_len + ETH_HLEN + l3_overhead;
 
-       /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
-       dst = sk_dst_get(tunnel->sock);
-       if (dst) {
-               /* dst_mtu will use PMTU if found, else fallback to intf MTU */
-               u32 pmtu = dst_mtu(dst);
+       mtu = l2tp_tunnel_dst_mtu(tunnel) - overhead;
+       if (mtu < dev->min_mtu || mtu > dev->max_mtu)
+               dev->mtu = ETH_DATA_LEN - overhead;
+       else
+               dev->mtu = mtu;
 
-               if (pmtu != 0)
-                       dev->mtu = pmtu;
-               dst_release(dst);
-       }
-       session->mtu = dev->mtu - overhead;
-       dev->mtu = session->mtu;
        dev->needed_headroom += session->hdr_len;
 }
 
index a9c05b2bc1b0bc3471bbf62dc3b7c11e971a7f08..0bc39cc20a3fcf8f5e532c137145747f40a7644c 100644 (file)
@@ -165,7 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
                print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
        }
 
-       l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+       l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
        l2tp_session_dec_refcount(session);
 
        return 0;
index 957369192ca181d6da21c9dda03d0e8a9726643e..42f828cf62fbb38fea121c270a57dd9d55a95cde 100644 (file)
@@ -178,8 +178,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
                print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
        }
 
-       l2tp_recv_common(session, skb, ptr, optr, 0, skb->len,
-                        tunnel->recv_payload_hook);
+       l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
        l2tp_session_dec_refcount(session);
 
        return 0;
@@ -500,7 +499,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        struct ip6_flowlabel *flowlabel = NULL;
        struct dst_entry *dst = NULL;
        struct flowi6 fl6;
-       struct sockcm_cookie sockc_unused = {0};
        struct ipcm6_cookie ipc6;
        int addr_len = msg->msg_namelen;
        int transhdrlen = 4; /* zero session-id */
@@ -525,9 +523,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        fl6.flowi6_mark = sk->sk_mark;
        fl6.flowi6_uid = sk->sk_uid;
 
-       ipc6.hlimit = -1;
-       ipc6.tclass = -1;
-       ipc6.dontfrag = -1;
+       ipcm6_init(&ipc6);
 
        if (lsa) {
                if (addr_len < SIN6_LEN_RFC2133)
@@ -575,8 +571,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                opt->tot_len = sizeof(struct ipv6_txoptions);
                ipc6.opt = opt;
 
-               err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6,
-                                           &sockc_unused);
+               err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6);
                if (err < 0) {
                        fl6_sock_release(flowlabel);
                        return err;
@@ -641,7 +636,7 @@ back_from_confirm:
        err = ip6_append_data(sk, ip_generic_getfrag, msg,
                              ulen, transhdrlen, &ipc6,
                              &fl6, (struct rt6_info *)dst,
-                             msg->msg_flags, &sockc_unused);
+                             msg->msg_flags);
        if (err)
                ip6_flush_pending_frames(sk);
        else if (!(msg->msg_flags & MSG_MORE))
index 5b9900889e311f964c4d7640f152a60edfb02740..2e1e92651545fd6cdc2b3ddbb4e75ea886e9452f 100644 (file)
@@ -560,9 +560,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
        }
 
        if (tunnel->version > 2) {
-               if (info->attrs[L2TP_ATTR_DATA_SEQ])
-                       cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
-
                if (info->attrs[L2TP_ATTR_L2SPEC_TYPE]) {
                        cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]);
                        if (cfg.l2specific_type != L2TP_L2SPECTYPE_DEFAULT &&
@@ -594,9 +591,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
                }
                if (info->attrs[L2TP_ATTR_IFNAME])
                        cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
-
-               if (info->attrs[L2TP_ATTR_VLAN_ID])
-                       cfg.vlan_id = nla_get_u16(info->attrs[L2TP_ATTR_VLAN_ID]);
        }
 
        if (info->attrs[L2TP_ATTR_DEBUG])
@@ -614,12 +608,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
        if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
                cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
 
-       if (info->attrs[L2TP_ATTR_MTU])
-               cfg.mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
-
-       if (info->attrs[L2TP_ATTR_MRU])
-               cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
-
 #ifdef CONFIG_MODULES
        if (l2tp_nl_cmd_ops[cfg.pw_type] == NULL) {
                genl_unlock();
@@ -693,9 +681,6 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
        if (info->attrs[L2TP_ATTR_DEBUG])
                session->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
 
-       if (info->attrs[L2TP_ATTR_DATA_SEQ])
-               session->data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
-
        if (info->attrs[L2TP_ATTR_RECV_SEQ])
                session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
 
@@ -710,12 +695,6 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
        if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
                session->reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
 
-       if (info->attrs[L2TP_ATTR_MTU])
-               session->mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
-
-       if (info->attrs[L2TP_ATTR_MRU])
-               session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
-
        ret = l2tp_session_notify(&l2tp_nl_family, info,
                                  session, L2TP_CMD_SESSION_MODIFY);
 
@@ -745,10 +724,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
            nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID,
                        session->peer_session_id) ||
            nla_put_u32(skb, L2TP_ATTR_DEBUG, session->debug) ||
-           nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype) ||
-           nla_put_u16(skb, L2TP_ATTR_MTU, session->mtu) ||
-           (session->mru &&
-            nla_put_u16(skb, L2TP_ATTR_MRU, session->mru)))
+           nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype))
                goto nla_put_failure;
 
        if ((session->ifname[0] &&
index cf6cca260e7b5b42ef72a3fe99c6f0c5ad08bb8a..6e2c8e7595e0ff0b1d50a9508ed8166bef703f9f 100644 (file)
@@ -93,7 +93,6 @@
 #include <linux/nsproxy.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
-#include <net/dst.h>
 #include <net/ip.h>
 #include <net/udp.h>
 #include <net/xfrm.h>
@@ -127,8 +126,6 @@ struct pppol2tp_session {
                                                 * PPPoX socket */
        struct sock             *__sk;          /* Copy of .sk, for cleanup */
        struct rcu_head         rcu;            /* For asynchronous release */
-       int                     flags;          /* accessed by PPPIOCGFLAGS.
-                                                * Unused. */
 };
 
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
@@ -183,25 +180,6 @@ out:
  * Receive data handling
  *****************************************************************************/
 
-static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
-{
-       /* Skip PPP header, if present.  In testing, Microsoft L2TP clients
-        * don't send the PPP header (PPP header compression enabled), but
-        * other clients can include the header. So we cope with both cases
-        * here. The PPP header is always FF03 when using L2TP.
-        *
-        * Note that skb->data[] isn't dereferenced from a u16 ptr here since
-        * the field may be unaligned.
-        */
-       if (!pskb_may_pull(skb, 2))
-               return 1;
-
-       if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI))
-               skb_pull(skb, 2);
-
-       return 0;
-}
-
 /* Receive message. This is the recvmsg for the PPPoL2TP socket.
  */
 static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg,
@@ -248,6 +226,17 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
        if (sk == NULL)
                goto no_sock;
 
+       /* If the first two bytes are 0xFF03, consider that it is the PPP's
+        * Address and Control fields and skip them. The L2TP module has always
+        * worked this way, although, in theory, the use of these fields should
+        * be negociated and handled at the PPP layer. These fields are
+        * constant: 0xFF is the All-Stations Address and 0x03 the Unnumbered
+        * Information command with Poll/Final bit set to zero (RFC 1662).
+        */
+       if (pskb_may_pull(skb, 2) && skb->data[0] == PPP_ALLSTATIONS &&
+           skb->data[1] == PPP_UI)
+               skb_pull(skb, 2);
+
        if (sk->sk_state & PPPOX_BOUND) {
                struct pppox_sock *po;
 
@@ -424,12 +413,6 @@ static void pppol2tp_put_sk(struct rcu_head *head)
        sock_put(ps->__sk);
 }
 
-/* Called by l2tp_core when a session socket is being closed.
- */
-static void pppol2tp_session_close(struct l2tp_session *session)
-{
-}
-
 /* Really kill the session socket. (Called from sock_put() if
  * refcnt == 0.)
  */
@@ -570,10 +553,8 @@ static void pppol2tp_show(struct seq_file *m, void *arg)
 static void pppol2tp_session_init(struct l2tp_session *session)
 {
        struct pppol2tp_session *ps;
-       struct dst_entry *dst;
 
        session->recv_skb = pppol2tp_recv;
-       session->session_close = pppol2tp_session_close;
 #if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
        session->show = pppol2tp_show;
 #endif
@@ -581,18 +562,106 @@ static void pppol2tp_session_init(struct l2tp_session *session)
        ps = l2tp_session_priv(session);
        mutex_init(&ps->sk_lock);
        ps->owner = current->pid;
+}
 
-       /* If PMTU discovery was enabled, use the MTU that was discovered */
-       dst = sk_dst_get(session->tunnel->sock);
-       if (dst) {
-               u32 pmtu = dst_mtu(dst);
+struct l2tp_connect_info {
+       u8 version;
+       int fd;
+       u32 tunnel_id;
+       u32 peer_tunnel_id;
+       u32 session_id;
+       u32 peer_session_id;
+};
 
-               if (pmtu) {
-                       session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD;
-                       session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD;
-               }
-               dst_release(dst);
+static int pppol2tp_sockaddr_get_info(const void *sa, int sa_len,
+                                     struct l2tp_connect_info *info)
+{
+       switch (sa_len) {
+       case sizeof(struct sockaddr_pppol2tp):
+       {
+               const struct sockaddr_pppol2tp *sa_v2in4 = sa;
+
+               if (sa_v2in4->sa_protocol != PX_PROTO_OL2TP)
+                       return -EINVAL;
+
+               info->version = 2;
+               info->fd = sa_v2in4->pppol2tp.fd;
+               info->tunnel_id = sa_v2in4->pppol2tp.s_tunnel;
+               info->peer_tunnel_id = sa_v2in4->pppol2tp.d_tunnel;
+               info->session_id = sa_v2in4->pppol2tp.s_session;
+               info->peer_session_id = sa_v2in4->pppol2tp.d_session;
+
+               break;
+       }
+       case sizeof(struct sockaddr_pppol2tpv3):
+       {
+               const struct sockaddr_pppol2tpv3 *sa_v3in4 = sa;
+
+               if (sa_v3in4->sa_protocol != PX_PROTO_OL2TP)
+                       return -EINVAL;
+
+               info->version = 3;
+               info->fd = sa_v3in4->pppol2tp.fd;
+               info->tunnel_id = sa_v3in4->pppol2tp.s_tunnel;
+               info->peer_tunnel_id = sa_v3in4->pppol2tp.d_tunnel;
+               info->session_id = sa_v3in4->pppol2tp.s_session;
+               info->peer_session_id = sa_v3in4->pppol2tp.d_session;
+
+               break;
        }
+       case sizeof(struct sockaddr_pppol2tpin6):
+       {
+               const struct sockaddr_pppol2tpin6 *sa_v2in6 = sa;
+
+               if (sa_v2in6->sa_protocol != PX_PROTO_OL2TP)
+                       return -EINVAL;
+
+               info->version = 2;
+               info->fd = sa_v2in6->pppol2tp.fd;
+               info->tunnel_id = sa_v2in6->pppol2tp.s_tunnel;
+               info->peer_tunnel_id = sa_v2in6->pppol2tp.d_tunnel;
+               info->session_id = sa_v2in6->pppol2tp.s_session;
+               info->peer_session_id = sa_v2in6->pppol2tp.d_session;
+
+               break;
+       }
+       case sizeof(struct sockaddr_pppol2tpv3in6):
+       {
+               const struct sockaddr_pppol2tpv3in6 *sa_v3in6 = sa;
+
+               if (sa_v3in6->sa_protocol != PX_PROTO_OL2TP)
+                       return -EINVAL;
+
+               info->version = 3;
+               info->fd = sa_v3in6->pppol2tp.fd;
+               info->tunnel_id = sa_v3in6->pppol2tp.s_tunnel;
+               info->peer_tunnel_id = sa_v3in6->pppol2tp.d_tunnel;
+               info->session_id = sa_v3in6->pppol2tp.s_session;
+               info->peer_session_id = sa_v3in6->pppol2tp.d_session;
+
+               break;
+       }
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/* Rough estimation of the maximum payload size a tunnel can transmit without
+ * fragmenting at the lower IP layer. Assumes L2TPv2 with sequence
+ * numbers and no IP option. Not quite accurate, but the result is mostly
+ * unused anyway.
+ */
+static int pppol2tp_tunnel_mtu(const struct l2tp_tunnel *tunnel)
+{
+       int mtu;
+
+       mtu = l2tp_tunnel_dst_mtu(tunnel);
+       if (mtu <= PPPOL2TP_HEADER_OVERHEAD)
+               return 1500 - PPPOL2TP_HEADER_OVERHEAD;
+
+       return mtu - PPPOL2TP_HEADER_OVERHEAD;
 }
 
 /* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
@@ -601,34 +670,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
                            int sockaddr_len, int flags)
 {
        struct sock *sk = sock->sk;
-       struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
        struct pppox_sock *po = pppox_sk(sk);
        struct l2tp_session *session = NULL;
+       struct l2tp_connect_info info;
        struct l2tp_tunnel *tunnel;
        struct pppol2tp_session *ps;
        struct l2tp_session_cfg cfg = { 0, };
-       int error = 0;
-       u32 tunnel_id, peer_tunnel_id;
-       u32 session_id, peer_session_id;
        bool drop_refcnt = false;
        bool drop_tunnel = false;
        bool new_session = false;
        bool new_tunnel = false;
-       int ver = 2;
-       int fd;
-
-       lock_sock(sk);
-
-       error = -EINVAL;
+       int error;
 
-       if (sockaddr_len != sizeof(struct sockaddr_pppol2tp) &&
-           sockaddr_len != sizeof(struct sockaddr_pppol2tpv3) &&
-           sockaddr_len != sizeof(struct sockaddr_pppol2tpin6) &&
-           sockaddr_len != sizeof(struct sockaddr_pppol2tpv3in6))
-               goto end;
+       error = pppol2tp_sockaddr_get_info(uservaddr, sockaddr_len, &info);
+       if (error < 0)
+               return error;
 
-       if (sp->sa_protocol != PX_PROTO_OL2TP)
-               goto end;
+       lock_sock(sk);
 
        /* Check for already bound sockets */
        error = -EBUSY;
@@ -640,56 +698,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
        if (sk->sk_user_data)
                goto end; /* socket is already attached */
 
-       /* Get params from socket address. Handle L2TPv2 and L2TPv3.
-        * This is nasty because there are different sockaddr_pppol2tp
-        * structs for L2TPv2, L2TPv3, over IPv4 and IPv6. We use
-        * the sockaddr size to determine which structure the caller
-        * is using.
-        */
-       peer_tunnel_id = 0;
-       if (sockaddr_len == sizeof(struct sockaddr_pppol2tp)) {
-               fd = sp->pppol2tp.fd;
-               tunnel_id = sp->pppol2tp.s_tunnel;
-               peer_tunnel_id = sp->pppol2tp.d_tunnel;
-               session_id = sp->pppol2tp.s_session;
-               peer_session_id = sp->pppol2tp.d_session;
-       } else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3)) {
-               struct sockaddr_pppol2tpv3 *sp3 =
-                       (struct sockaddr_pppol2tpv3 *) sp;
-               ver = 3;
-               fd = sp3->pppol2tp.fd;
-               tunnel_id = sp3->pppol2tp.s_tunnel;
-               peer_tunnel_id = sp3->pppol2tp.d_tunnel;
-               session_id = sp3->pppol2tp.s_session;
-               peer_session_id = sp3->pppol2tp.d_session;
-       } else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpin6)) {
-               struct sockaddr_pppol2tpin6 *sp6 =
-                       (struct sockaddr_pppol2tpin6 *) sp;
-               fd = sp6->pppol2tp.fd;
-               tunnel_id = sp6->pppol2tp.s_tunnel;
-               peer_tunnel_id = sp6->pppol2tp.d_tunnel;
-               session_id = sp6->pppol2tp.s_session;
-               peer_session_id = sp6->pppol2tp.d_session;
-       } else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3in6)) {
-               struct sockaddr_pppol2tpv3in6 *sp6 =
-                       (struct sockaddr_pppol2tpv3in6 *) sp;
-               ver = 3;
-               fd = sp6->pppol2tp.fd;
-               tunnel_id = sp6->pppol2tp.s_tunnel;
-               peer_tunnel_id = sp6->pppol2tp.d_tunnel;
-               session_id = sp6->pppol2tp.s_session;
-               peer_session_id = sp6->pppol2tp.d_session;
-       } else {
-               error = -EINVAL;
-               goto end; /* bad socket address */
-       }
-
        /* Don't bind if tunnel_id is 0 */
        error = -EINVAL;
-       if (tunnel_id == 0)
+       if (!info.tunnel_id)
                goto end;
 
-       tunnel = l2tp_tunnel_get(sock_net(sk), tunnel_id);
+       tunnel = l2tp_tunnel_get(sock_net(sk), info.tunnel_id);
        if (tunnel)
                drop_tunnel = true;
 
@@ -697,7 +711,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
         * peer_session_id is 0. Otherwise look up tunnel using supplied
         * tunnel id.
         */
-       if ((session_id == 0) && (peer_session_id == 0)) {
+       if (!info.session_id && !info.peer_session_id) {
                if (tunnel == NULL) {
                        struct l2tp_tunnel_cfg tcfg = {
                                .encap = L2TP_ENCAPTYPE_UDP,
@@ -707,12 +721,16 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
                        /* Prevent l2tp_tunnel_register() from trying to set up
                         * a kernel socket.
                         */
-                       if (fd < 0) {
+                       if (info.fd < 0) {
                                error = -EBADF;
                                goto end;
                        }
 
-                       error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
+                       error = l2tp_tunnel_create(sock_net(sk), info.fd,
+                                                  info.version,
+                                                  info.tunnel_id,
+                                                  info.peer_tunnel_id, &tcfg,
+                                                  &tunnel);
                        if (error < 0)
                                goto end;
 
@@ -737,13 +755,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
                        goto end;
        }
 
-       if (tunnel->recv_payload_hook == NULL)
-               tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
-
        if (tunnel->peer_tunnel_id == 0)
-               tunnel->peer_tunnel_id = peer_tunnel_id;
+               tunnel->peer_tunnel_id = info.peer_tunnel_id;
 
-       session = l2tp_session_get(sock_net(sk), tunnel, session_id);
+       session = l2tp_session_get(sock_net(sk), tunnel, info.session_id);
        if (session) {
                drop_refcnt = true;
 
@@ -766,14 +781,11 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
                        goto end;
                }
        } else {
-               /* Default MTU must allow space for UDP/L2TP/PPP headers */
-               cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-               cfg.mru = cfg.mtu;
                cfg.pw_type = L2TP_PWTYPE_PPP;
 
                session = l2tp_session_create(sizeof(struct pppol2tp_session),
-                                             tunnel, session_id,
-                                             peer_session_id, &cfg);
+                                             tunnel, info.session_id,
+                                             info.peer_session_id, &cfg);
                if (IS_ERR(session)) {
                        error = PTR_ERR(session);
                        goto end;
@@ -813,7 +825,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
        po->chan.private = sk;
        po->chan.ops     = &pppol2tp_chan_ops;
-       po->chan.mtu     = session->mtu;
+       po->chan.mtu     = pppol2tp_tunnel_mtu(tunnel);
 
        error = ppp_register_net_channel(sock_net(sk), &po->chan);
        if (error) {
@@ -869,12 +881,6 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
                goto err;
        }
 
-       /* Default MTU values. */
-       if (cfg->mtu == 0)
-               cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-       if (cfg->mru == 0)
-               cfg->mru = cfg->mtu;
-
        /* Allocate and initialize a new session context. */
        session = l2tp_session_create(sizeof(struct pppol2tp_session),
                                      tunnel, session_id,
@@ -1038,11 +1044,9 @@ static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
 static int pppol2tp_session_ioctl(struct l2tp_session *session,
                                  unsigned int cmd, unsigned long arg)
 {
-       struct ifreq ifr;
        int err = 0;
        struct sock *sk;
        int val = (int) arg;
-       struct pppol2tp_session *ps = l2tp_session_priv(session);
        struct l2tp_tunnel *tunnel = session->tunnel;
        struct pppol2tp_ioc_stats stats;
 
@@ -1055,85 +1059,19 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
                return -EBADR;
 
        switch (cmd) {
-       case SIOCGIFMTU:
-               err = -ENXIO;
-               if (!(sk->sk_state & PPPOX_CONNECTED))
-                       break;
-
-               err = -EFAULT;
-               if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
-                       break;
-               ifr.ifr_mtu = session->mtu;
-               if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
-                       break;
-
-               l2tp_info(session, L2TP_MSG_CONTROL, "%s: get mtu=%d\n",
-                         session->name, session->mtu);
-               err = 0;
-               break;
-
-       case SIOCSIFMTU:
-               err = -ENXIO;
-               if (!(sk->sk_state & PPPOX_CONNECTED))
-                       break;
-
-               err = -EFAULT;
-               if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
-                       break;
-
-               session->mtu = ifr.ifr_mtu;
-
-               l2tp_info(session, L2TP_MSG_CONTROL, "%s: set mtu=%d\n",
-                         session->name, session->mtu);
-               err = 0;
-               break;
-
        case PPPIOCGMRU:
-               err = -ENXIO;
-               if (!(sk->sk_state & PPPOX_CONNECTED))
-                       break;
-
-               err = -EFAULT;
-               if (put_user(session->mru, (int __user *) arg))
-                       break;
-
-               l2tp_info(session, L2TP_MSG_CONTROL, "%s: get mru=%d\n",
-                         session->name, session->mru);
-               err = 0;
-               break;
-
-       case PPPIOCSMRU:
-               err = -ENXIO;
-               if (!(sk->sk_state & PPPOX_CONNECTED))
-                       break;
-
-               err = -EFAULT;
-               if (get_user(val, (int __user *) arg))
-                       break;
-
-               session->mru = val;
-               l2tp_info(session, L2TP_MSG_CONTROL, "%s: set mru=%d\n",
-                         session->name, session->mru);
-               err = 0;
-               break;
-
        case PPPIOCGFLAGS:
                err = -EFAULT;
-               if (put_user(ps->flags, (int __user *) arg))
+               if (put_user(0, (int __user *)arg))
                        break;
-
-               l2tp_info(session, L2TP_MSG_CONTROL, "%s: get flags=%d\n",
-                         session->name, ps->flags);
                err = 0;
                break;
 
+       case PPPIOCSMRU:
        case PPPIOCSFLAGS:
                err = -EFAULT;
-               if (get_user(val, (int __user *) arg))
+               if (get_user(val, (int __user *)arg))
                        break;
-               ps->flags = val;
-               l2tp_info(session, L2TP_MSG_CONTROL, "%s: set flags=%d\n",
-                         session->name, ps->flags);
                err = 0;
                break;
 
@@ -1722,8 +1660,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
                   tunnel->peer_tunnel_id,
                   session->peer_session_id,
                   state, user_data_ok);
-       seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
-                  session->mtu, session->mru,
+       seq_printf(m, "   0/0/%c/%c/%s %08x %u\n",
                   session->recv_seq ? 'R' : '-',
                   session->send_seq ? 'S' : '-',
                   session->lns_mode ? "LNS" : "LAC",
index b91c6510816253b82cfe9e81d2c7a2cce0a30640..176a6c1521a56f6048eacd84f6c75387df45730c 100644 (file)
@@ -6,5 +6,5 @@ config LLC2
        tristate "ANSI/IEEE 802.2 LLC type 2 Support"
        select LLC
        help
-         This is a Logical Link Layer type 2, connection oriented support. 
+         This is a Logical Link Layer type 2, connection oriented support.
          Select this if you want to have support for PF_LLC sockets.
index 4e260cff3c5d89d3064120f4f3718e6feff1e44a..5e0ef436daaefbb623c056bffc8cee765b160c59 100644 (file)
@@ -4,7 +4,7 @@
 # Copyright (c) 1997 by Procom Technology,Inc.
 #              2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
 #
-# This program can be redistributed or modified under the terms of the 
+# This program can be redistributed or modified under the terms of the
 # GNU General Public License as published by the Free Software Foundation.
 # This program is distributed without any warranty or implied warranty
 # of merchantability or fitness for a particular purpose.
index 6daf391b3e847db42396caf9fbbfe7dca6440006..8db03c2d5440b12536b3b7016bc5c01b56666d8a 100644 (file)
@@ -151,4 +151,3 @@ out:
        sock_put(sk);
        return rc;
 }
-
index e3589ade62e073a3ae91503c2cec03a3e1705f29..bb707789ef2bb21d942f44aacf3d87972abdc23a 100644 (file)
@@ -12,6 +12,7 @@ mac80211-y := \
        scan.o offchannel.o \
        ht.o agg-tx.o agg-rx.o \
        vht.o \
+       he.o \
        ibss.o \
        iface.o \
        rate.o \
index e83c19d4c292e46fce243f60fdf77557096c2af6..6a4f154c99f6b27bd5d41fb502a939c82c654a2b 100644 (file)
@@ -245,6 +245,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
        };
        int i, ret = -EOPNOTSUPP;
        u16 status = WLAN_STATUS_REQUEST_DECLINED;
+       u16 max_buf_size;
 
        if (tid >= IEEE80211_FIRST_TSPEC_TSID) {
                ht_dbg(sta->sdata,
@@ -268,13 +269,18 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
                goto end;
        }
 
+       if (sta->sta.he_cap.has_he)
+               max_buf_size = IEEE80211_MAX_AMPDU_BUF;
+       else
+               max_buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
+
        /* sanity check for incoming parameters:
         * check if configuration can support the BA policy
         * and if buffer size does not exceeds max value */
        /* XXX: check own ht delayed BA capability?? */
        if (((ba_policy != 1) &&
             (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) ||
-           (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
+           (buf_size > max_buf_size)) {
                status = WLAN_STATUS_INVALID_QOS_PARAM;
                ht_dbg_ratelimited(sta->sdata,
                                   "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n",
@@ -283,7 +289,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
        }
        /* determine default buffer size */
        if (buf_size == 0)
-               buf_size = IEEE80211_MAX_AMPDU_BUF;
+               buf_size = max_buf_size;
 
        /* make sure the size doesn't exceed the maximum supported by the hw */
        if (buf_size > sta->sta.max_rx_aggregation_subframes)
index ac4295296514365ad1972ddc22754be1cdb8384a..69e831bc317beb666e2a836facb572e59e727a9c 100644 (file)
@@ -463,6 +463,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
                .timeout = 0,
        };
        int ret;
+       u16 buf_size;
 
        tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
 
@@ -511,11 +512,22 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
        sta->ampdu_mlme.addba_req_num[tid]++;
        spin_unlock_bh(&sta->lock);
 
+       if (sta->sta.he_cap.has_he) {
+               buf_size = local->hw.max_tx_aggregation_subframes;
+       } else {
+               /*
+                * We really should use what the driver told us it will
+                * transmit as the maximum, but certain APs (e.g. the
+                * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012)
+                * will crash when we use a lower number.
+                */
+               buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
+       }
+
        /* send AddBA request */
        ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
                                     tid_tx->dialog_token, params.ssn,
-                                    IEEE80211_MAX_AMPDU_BUF,
-                                    tid_tx->timeout);
+                                    buf_size, tid_tx->timeout);
 }
 
 /*
@@ -905,8 +917,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 {
        struct tid_ampdu_tx *tid_tx;
        struct ieee80211_txq *txq;
-       u16 capab, tid;
-       u8 buf_size;
+       u16 capab, tid, buf_size;
        bool amsdu;
 
        capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
index bdf6fa78d0d2b101a448fe10e925f5e381486224..d25da0e66da16218c340e4c7f8a9aaf663985b9c 100644 (file)
@@ -495,7 +495,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
                goto out_unlock;
        }
 
-       ieee80211_key_free(key, true);
+       ieee80211_key_free(key, sdata->vif.type == NL80211_IFTYPE_STATION);
 
        ret = 0;
  out_unlock:
@@ -1412,6 +1412,11 @@ static int sta_apply_parameters(struct ieee80211_local *local,
                ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
                                                    params->vht_capa, sta);
 
+       if (params->he_capa)
+               ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
+                                                 (void *)params->he_capa,
+                                                 params->he_capa_len, sta);
+
        if (params->opmode_notif_used) {
                /* returned value is only needed for rc update, but the
                 * rc isn't initialized here yet, so ignore it
@@ -3486,7 +3491,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
        }
 
        local_bh_disable();
-       ieee80211_xmit(sdata, sta, skb);
+       ieee80211_xmit(sdata, sta, skb, 0);
        local_bh_enable();
 
        ret = 0;
index 690c142a7a440b0b31b27fb436942162692e9787..5ac743816b59eb416f2153887e3608c72ca89b68 100644 (file)
@@ -116,16 +116,16 @@ static void ieee80211_get_stats(struct net_device *dev,
                data[i++] = sta->sta_state;
 
 
-               if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))
+               if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))
                        data[i] = 100000ULL *
                                cfg80211_calculate_bitrate(&sinfo.txrate);
                i++;
-               if (sinfo.filled & BIT(NL80211_STA_INFO_RX_BITRATE))
+               if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE))
                        data[i] = 100000ULL *
                                cfg80211_calculate_bitrate(&sinfo.rxrate);
                i++;
 
-               if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))
+               if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))
                        data[i] = (u8)sinfo.signal_avg;
                i++;
        } else {
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
new file mode 100644 (file)
index 0000000..769078e
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * HE handling
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "ieee80211_i.h"
+
+void
+ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
+                                 struct ieee80211_supported_band *sband,
+                                 const u8 *he_cap_ie, u8 he_cap_len,
+                                 struct sta_info *sta)
+{
+       struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
+       struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
+       u8 he_ppe_size;
+       u8 mcs_nss_size;
+       u8 he_total_size;
+
+       memset(he_cap, 0, sizeof(*he_cap));
+
+       if (!he_cap_ie || !ieee80211_get_he_sta_cap(sband))
+               return;
+
+       /* Make sure size is OK */
+       mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem);
+       he_ppe_size =
+               ieee80211_he_ppe_size(he_cap_ie[sizeof(he_cap->he_cap_elem) +
+                                               mcs_nss_size],
+                                     he_cap_ie_elem->phy_cap_info);
+       he_total_size = sizeof(he_cap->he_cap_elem) + mcs_nss_size +
+                       he_ppe_size;
+       if (he_cap_len < he_total_size)
+               return;
+
+       memcpy(&he_cap->he_cap_elem, he_cap_ie, sizeof(he_cap->he_cap_elem));
+
+       /* HE Tx/Rx HE MCS NSS Support Field */
+       memcpy(&he_cap->he_mcs_nss_supp,
+              &he_cap_ie[sizeof(he_cap->he_cap_elem)], mcs_nss_size);
+
+       /* Check if there are (optional) PPE Thresholds */
+       if (he_cap->he_cap_elem.phy_cap_info[6] &
+           IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT)
+               memcpy(he_cap->ppe_thres,
+                      &he_cap_ie[sizeof(he_cap->he_cap_elem) + mcs_nss_size],
+                      he_ppe_size);
+
+       he_cap->has_he = true;
+}
index 26a7ba3b698f1e94598350d3e03dd319d6ef10e2..f849ea814993f34d34e9485f6681659c4408d6c9 100644 (file)
@@ -352,7 +352,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
                    test_and_clear_bit(tid,
                                       sta->ampdu_mlme.tid_rx_manage_offl))
                        ___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
-                                                        IEEE80211_MAX_AMPDU_BUF,
+                                                        IEEE80211_MAX_AMPDU_BUF_HT,
                                                         false, true);
 
                if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
index d1978aa1c15ddf4d946bae7458343f9875861451..172aeae21ae9d00bc9f43c4effc0008ff624ee6d 100644 (file)
@@ -165,6 +165,7 @@ typedef unsigned __bitwise ieee80211_tx_result;
 #define TX_DROP                ((__force ieee80211_tx_result) 1u)
 #define TX_QUEUED      ((__force ieee80211_tx_result) 2u)
 
+#define IEEE80211_TX_NO_SEQNO          BIT(0)
 #define IEEE80211_TX_UNICAST           BIT(1)
 #define IEEE80211_TX_PS_BUFFERED       BIT(2)
 
@@ -364,6 +365,7 @@ enum ieee80211_sta_flags {
        IEEE80211_STA_DISABLE_160MHZ    = BIT(13),
        IEEE80211_STA_DISABLE_WMM       = BIT(14),
        IEEE80211_STA_ENABLE_RRM        = BIT(15),
+       IEEE80211_STA_DISABLE_HE        = BIT(16),
 };
 
 struct ieee80211_mgd_auth_data {
@@ -1453,6 +1455,10 @@ struct ieee802_11_elems {
        const struct ieee80211_vht_cap *vht_cap_elem;
        const struct ieee80211_vht_operation *vht_operation;
        const struct ieee80211_meshconf_ie *mesh_config;
+       const u8 *he_cap;
+       const struct ieee80211_he_operation *he_operation;
+       const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
+       const u8 *uora_element;
        const u8 *mesh_id;
        const u8 *peering;
        const __le16 *awake_window;
@@ -1482,6 +1488,7 @@ struct ieee802_11_elems {
        u8 ext_supp_rates_len;
        u8 wmm_info_len;
        u8 wmm_param_len;
+       u8 he_cap_len;
        u8 mesh_id_len;
        u8 peering_len;
        u8 preq_len;
@@ -1824,6 +1831,13 @@ void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
 enum nl80211_chan_width
 ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta);
 
+/* HE */
+void
+ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
+                                 struct ieee80211_supported_band *sband,
+                                 const u8 *he_cap_ie, u8 he_cap_len,
+                                 struct sta_info *sta);
+
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
                                       struct ieee80211_mgmt *mgmt,
@@ -1880,19 +1894,20 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
                               bool bss_notify, bool enable_qos);
 void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
-                   struct sta_info *sta, struct sk_buff *skb);
+                   struct sta_info *sta, struct sk_buff *skb,
+                   u32 txdata_flags);
 
 void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
                                 struct sk_buff *skb, int tid,
-                                enum nl80211_band band);
+                                enum nl80211_band band, u32 txdata_flags);
 
 static inline void
 ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
                          struct sk_buff *skb, int tid,
-                         enum nl80211_band band)
+                         enum nl80211_band band, u32 txdata_flags)
 {
        rcu_read_lock();
-       __ieee80211_tx_skb_tid_band(sdata, skb, tid, band);
+       __ieee80211_tx_skb_tid_band(sdata, skb, tid, band, txdata_flags);
        rcu_read_unlock();
 }
 
@@ -1910,7 +1925,7 @@ static inline void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata,
        }
 
        __ieee80211_tx_skb_tid_band(sdata, skb, tid,
-                                   chanctx_conf->def.chan->band);
+                                   chanctx_conf->def.chan->band, 0);
        rcu_read_unlock();
 }
 
@@ -2031,26 +2046,27 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
                                    const u8 *bssid, u16 stype, u16 reason,
                                    bool send_frame, u8 *frame_buf);
+
+enum {
+       IEEE80211_PROBE_FLAG_DIRECTED           = BIT(0),
+       IEEE80211_PROBE_FLAG_MIN_CONTENT        = BIT(1),
+       IEEE80211_PROBE_FLAG_RANDOM_SN          = BIT(2),
+};
+
 int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
                             size_t buffer_len,
                             struct ieee80211_scan_ies *ie_desc,
                             const u8 *ie, size_t ie_len,
                             u8 bands_used, u32 *rate_masks,
-                            struct cfg80211_chan_def *chandef);
+                            struct cfg80211_chan_def *chandef,
+                            u32 flags);
 struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
                                          const u8 *src, const u8 *dst,
                                          u32 ratemask,
                                          struct ieee80211_channel *chan,
                                          const u8 *ssid, size_t ssid_len,
                                          const u8 *ie, size_t ie_len,
-                                         bool directed);
-void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata,
-                             const u8 *src, const u8 *dst,
-                             const u8 *ssid, size_t ssid_len,
-                             const u8 *ie, size_t ie_len,
-                             u32 ratemask, bool directed, u32 tx_flags,
-                             struct ieee80211_channel *channel, bool scan);
-
+                                         u32 flags);
 u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
                            struct ieee802_11_elems *elems,
                            enum nl80211_band band, u32 *basic_rates);
@@ -2073,6 +2089,9 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
                               u32 cap);
 u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
                                const struct cfg80211_chan_def *chandef);
+u8 *ieee80211_ie_build_he_cap(u8 *pos,
+                             const struct ieee80211_sta_he_cap *he_cap,
+                             u8 *end);
 int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
                             const struct ieee80211_supported_band *sband,
                             const u8 *srates, int srates_len, u32 *rates);
index 555e389b7dfa34ebf494c9f2432fb6409eff74a9..5e6cf2cee965264dd45cda775b370b6dcb022413 100644 (file)
@@ -1130,7 +1130,7 @@ static void ieee80211_uninit(struct net_device *dev)
 
 static u16 ieee80211_netdev_select_queue(struct net_device *dev,
                                         struct sk_buff *skb,
-                                        void *accel_priv,
+                                        struct net_device *sb_dev,
                                         select_queue_fallback_t fallback)
 {
        return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
@@ -1176,7 +1176,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
 
 static u16 ieee80211_monitor_select_queue(struct net_device *dev,
                                          struct sk_buff *skb,
-                                         void *accel_priv,
+                                         struct net_device *sb_dev,
                                          select_queue_fallback_t fallback)
 {
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
index ee0d0cc8dc3bd686341d6c3e172bf1664c882bcb..c054ac85793c3f484a1425e0a052f6f420eb0df3 100644 (file)
@@ -656,11 +656,15 @@ int ieee80211_key_link(struct ieee80211_key *key,
 {
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_key *old_key;
-       int idx, ret;
-       bool pairwise;
-
-       pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
-       idx = key->conf.keyidx;
+       int idx = key->conf.keyidx;
+       bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
+       /*
+        * We want to delay tailroom updates only for station - in that
+        * case it helps roaming speed, but in other cases it hurts and
+        * can cause warnings to appear.
+        */
+       bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION;
+       int ret;
 
        mutex_lock(&sdata->local->key_mtx);
 
@@ -688,14 +692,14 @@ int ieee80211_key_link(struct ieee80211_key *key,
        increment_tailroom_need_count(sdata);
 
        ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
-       ieee80211_key_destroy(old_key, true);
+       ieee80211_key_destroy(old_key, delay_tailroom);
 
        ieee80211_debugfs_key_add(key);
 
        if (!local->wowlan) {
                ret = ieee80211_key_enable_hw_accel(key);
                if (ret)
-                       ieee80211_key_free(key, true);
+                       ieee80211_key_free(key, delay_tailroom);
        } else {
                ret = 0;
        }
@@ -930,7 +934,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
                ieee80211_key_replace(key->sdata, key->sta,
                                key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
                                key, NULL);
-               __ieee80211_key_destroy(key, true);
+               __ieee80211_key_destroy(key, key->sdata->vif.type ==
+                                       NL80211_IFTYPE_STATION);
        }
 
        for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
@@ -940,7 +945,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
                ieee80211_key_replace(key->sdata, key->sta,
                                key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
                                key, NULL);
-               __ieee80211_key_destroy(key, true);
+               __ieee80211_key_destroy(key, key->sdata->vif.type ==
+                                       NL80211_IFTYPE_STATION);
        }
 
        mutex_unlock(&local->key_mtx);
index fb73451ed85ec65cd0b4b5cc3808d51d40a8dd39..4fb2709cb52796c752f052a746bd5c420d6caf08 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright (C) 2017     Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -557,10 +558,19 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
        wiphy_ext_feature_set(wiphy,
                              NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211);
 
-       if (!ops->hw_scan)
+       if (!ops->hw_scan) {
                wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
                                   NL80211_FEATURE_AP_SCAN;
-
+               /*
+                * if the driver behaves correctly using the probe request
+                * (template) from mac80211, then both of these should be
+                * supported even with hw scan - but let drivers opt in.
+                */
+               wiphy_ext_feature_set(wiphy,
+                                     NL80211_EXT_FEATURE_SCAN_RANDOM_SN);
+               wiphy_ext_feature_set(wiphy,
+                                     NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT);
+       }
 
        if (!ops->set_key)
                wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
@@ -588,8 +598,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
        local->hw.queues = 1;
        local->hw.max_rates = 1;
        local->hw.max_report_rates = 0;
-       local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
-       local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+       local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT;
+       local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT;
        local->hw.offchannel_tx_hw_queue = IEEE80211_INVAL_HW_QUEUE;
        local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
        local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
@@ -816,7 +826,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
        int result, i;
        enum nl80211_band band;
        int channels, max_bitrates;
-       bool supp_ht, supp_vht;
+       bool supp_ht, supp_vht, supp_he;
        netdev_features_t feature_whitelist;
        struct cfg80211_chan_def dflt_chandef = {};
 
@@ -896,6 +906,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
        max_bitrates = 0;
        supp_ht = false;
        supp_vht = false;
+       supp_he = false;
        for (band = 0; band < NUM_NL80211_BANDS; band++) {
                struct ieee80211_supported_band *sband;
 
@@ -922,6 +933,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
                supp_ht = supp_ht || sband->ht_cap.ht_supported;
                supp_vht = supp_vht || sband->vht_cap.vht_supported;
 
+               if (!supp_he)
+                       supp_he = !!ieee80211_get_he_sta_cap(sband);
+
                if (!sband->ht_cap.ht_supported)
                        continue;
 
@@ -1011,6 +1025,18 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
                local->scan_ies_len +=
                        2 + sizeof(struct ieee80211_vht_cap);
 
+       /* HE cap element is variable in size - set len to allow max size */
+       /*
+        * TODO: 1 is added at the end of the calculation to accommodate for
+        *      the temporary placing of the HE capabilities IE under EXT.
+        *      Remove it once it is placed in the final place.
+        */
+       if (supp_he)
+               local->scan_ies_len +=
+                       2 + sizeof(struct ieee80211_he_cap_elem) +
+                       sizeof(struct ieee80211_he_mcs_nss_supp) +
+                       IEEE80211_HE_PPE_THRES_MAX_LEN + 1;
+
        if (!local->ops->hw_scan) {
                /* For hw_scan, driver needs to set these up. */
                local->hw.wiphy->max_scan_ssids = 4;
index a59187c016e08193e50078becbd169820f2efe9a..7fb9957359a3c1be557e577ba5b76cc4c1177105 100644 (file)
@@ -149,6 +149,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
                             struct ieee80211_channel *channel,
                             const struct ieee80211_ht_operation *ht_oper,
                             const struct ieee80211_vht_operation *vht_oper,
+                            const struct ieee80211_he_operation *he_oper,
                             struct cfg80211_chan_def *chandef, bool tracking)
 {
        struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -207,7 +208,27 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
        }
 
        vht_chandef = *chandef;
-       if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) {
+       if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && he_oper &&
+           (le32_to_cpu(he_oper->he_oper_params) &
+            IEEE80211_HE_OPERATION_VHT_OPER_INFO)) {
+               struct ieee80211_vht_operation he_oper_vht_cap;
+
+               /*
+                * Set only first 3 bytes (other 2 aren't used in
+                * ieee80211_chandef_vht_oper() anyway)
+                */
+               memcpy(&he_oper_vht_cap, he_oper->optional, 3);
+               he_oper_vht_cap.basic_mcs_set = cpu_to_le16(0);
+
+               if (!ieee80211_chandef_vht_oper(&he_oper_vht_cap,
+                                               &vht_chandef)) {
+                       if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+                               sdata_info(sdata,
+                                          "HE AP VHT information is invalid, disable HE\n");
+                       ret = IEEE80211_STA_DISABLE_HE;
+                       goto out;
+               }
+       } else if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) {
                if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
                        sdata_info(sdata,
                                   "AP VHT information is invalid, disable VHT\n");
@@ -300,12 +321,14 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
                               const struct ieee80211_ht_cap *ht_cap,
                               const struct ieee80211_ht_operation *ht_oper,
                               const struct ieee80211_vht_operation *vht_oper,
+                              const struct ieee80211_he_operation *he_oper,
                               const u8 *bssid, u32 *changed)
 {
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-       struct ieee80211_supported_band *sband;
-       struct ieee80211_channel *chan;
+       struct ieee80211_channel *chan = sdata->vif.bss_conf.chandef.chan;
+       struct ieee80211_supported_band *sband =
+               local->hw.wiphy->bands[chan->band];
        struct cfg80211_chan_def chandef;
        u16 ht_opmode;
        u32 flags;
@@ -320,6 +343,11 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
        if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
                vht_oper = NULL;
 
+       /* don't check HE if we associated as non-HE station */
+       if (ifmgd->flags & IEEE80211_STA_DISABLE_HE ||
+           !ieee80211_get_he_sta_cap(sband))
+               he_oper = NULL;
+
        if (WARN_ON_ONCE(!sta))
                return -EINVAL;
 
@@ -333,12 +361,9 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
                sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
        }
 
-       chan = sdata->vif.bss_conf.chandef.chan;
-       sband = local->hw.wiphy->bands[chan->band];
-
-       /* calculate new channel (type) based on HT/VHT operation IEs */
+       /* calculate new channel (type) based on HT/VHT/HE operation IEs */
        flags = ieee80211_determine_chantype(sdata, sband, chan,
-                                            ht_oper, vht_oper,
+                                            ht_oper, vht_oper, he_oper,
                                             &chandef, true);
 
        /*
@@ -582,6 +607,34 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
        ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
 }
 
+/* This function determines HE capability flags for the association
+ * and builds the IE.
+ */
+static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
+                               struct sk_buff *skb,
+                               struct ieee80211_supported_band *sband)
+{
+       u8 *pos;
+       const struct ieee80211_sta_he_cap *he_cap = NULL;
+       u8 he_cap_size;
+
+       he_cap = ieee80211_get_he_sta_cap(sband);
+       if (!he_cap)
+               return;
+
+       /*
+        * TODO: the 1 added is because this temporarily is under the EXTENSION
+        * IE. Get rid of it when it moves.
+        */
+       he_cap_size =
+               2 + 1 + sizeof(he_cap->he_cap_elem) +
+               ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) +
+               ieee80211_he_ppe_size(he_cap->ppe_thres[0],
+                                     he_cap->he_cap_elem.phy_cap_info);
+       pos = skb_put(skb, he_cap_size);
+       ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size);
+}
+
 static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 {
        struct ieee80211_local *local = sdata->local;
@@ -643,6 +696,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
                        2 + 2 * sband->n_channels + /* supported channels */
                        2 + sizeof(struct ieee80211_ht_cap) + /* HT */
                        2 + sizeof(struct ieee80211_vht_cap) + /* VHT */
+                       2 + 1 + sizeof(struct ieee80211_he_cap_elem) + /* HE */
+                               sizeof(struct ieee80211_he_mcs_nss_supp) +
+                               IEEE80211_HE_PPE_THRES_MAX_LEN +
                        assoc_data->ie_len + /* extra IEs */
                        (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
                        9, /* WMM */
@@ -827,11 +883,41 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
                offset = noffset;
        }
 
+       /* if present, add any custom IEs that go before HE */
+       if (assoc_data->ie_len) {
+               static const u8 before_he[] = {
+                       /*
+                        * no need to list the ones split off before VHT
+                        * or generated here
+                        */
+                       WLAN_EID_OPMODE_NOTIF,
+                       WLAN_EID_EXTENSION, WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE,
+                       /* 11ai elements */
+                       WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_SESSION,
+                       WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_PUBLIC_KEY,
+                       WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_KEY_CONFIRM,
+                       WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_HLP_CONTAINER,
+                       WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN,
+                       /* TODO: add 11ah/11aj/11ak elements */
+               };
+
+               /* RIC already taken above, so no need to handle here anymore */
+               noffset = ieee80211_ie_split(assoc_data->ie, assoc_data->ie_len,
+                                            before_he, ARRAY_SIZE(before_he),
+                                            offset);
+               pos = skb_put(skb, noffset - offset);
+               memcpy(pos, assoc_data->ie + offset, noffset - offset);
+               offset = noffset;
+       }
+
        if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
                ieee80211_add_vht_ie(sdata, skb, sband,
                                     &assoc_data->ap_vht_cap);
 
-       /* if present, add any custom non-vendor IEs that go after HT */
+       if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+               ieee80211_add_he_ie(sdata, skb, sband);
+
+       /* if present, add any custom non-vendor IEs that go after HE */
        if (assoc_data->ie_len) {
                noffset = ieee80211_ie_split_vendor(assoc_data->ie,
                                                    assoc_data->ie_len,
@@ -898,6 +984,11 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
        struct ieee80211_hdr_3addr *nullfunc;
        struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
+       /* Don't send NDPs when STA is connected HE */
+       if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+           !(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+               return;
+
        skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
                !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
        if (!skb)
@@ -929,6 +1020,10 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
        if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
                return;
 
+       /* Don't send NDPs when connected HE */
+       if (!(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
+               return;
+
        skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30);
        if (!skb)
                return;
@@ -1700,9 +1795,11 @@ static void ieee80211_sta_handle_tspec_ac_params_wk(struct work_struct *work)
 }
 
 /* MLME */
-static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
-                                    struct ieee80211_sub_if_data *sdata,
-                                    const u8 *wmm_param, size_t wmm_param_len)
+static bool
+ieee80211_sta_wmm_params(struct ieee80211_local *local,
+                        struct ieee80211_sub_if_data *sdata,
+                        const u8 *wmm_param, size_t wmm_param_len,
+                        const struct ieee80211_mu_edca_param_set *mu_edca)
 {
        struct ieee80211_tx_queue_params params[IEEE80211_NUM_ACS];
        struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -1749,6 +1846,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
                                sdata->wmm_acm |= BIT(1) | BIT(2); /* BK/- */
                        if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
                                uapsd = true;
+                       params[ac].mu_edca = !!mu_edca;
+                       if (mu_edca)
+                               params[ac].mu_edca_param_rec = mu_edca->ac_bk;
                        break;
                case 2: /* AC_VI */
                        ac = IEEE80211_AC_VI;
@@ -1756,6 +1856,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
                                sdata->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */
                        if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
                                uapsd = true;
+                       params[ac].mu_edca = !!mu_edca;
+                       if (mu_edca)
+                               params[ac].mu_edca_param_rec = mu_edca->ac_vi;
                        break;
                case 3: /* AC_VO */
                        ac = IEEE80211_AC_VO;
@@ -1763,6 +1866,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
                                sdata->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */
                        if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
                                uapsd = true;
+                       params[ac].mu_edca = !!mu_edca;
+                       if (mu_edca)
+                               params[ac].mu_edca_param_rec = mu_edca->ac_vo;
                        break;
                case 0: /* AC_BE */
                default:
@@ -1771,6 +1877,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
                                sdata->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */
                        if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
                                uapsd = true;
+                       params[ac].mu_edca = !!mu_edca;
+                       if (mu_edca)
+                               params[ac].mu_edca_param_rec = mu_edca->ac_be;
                        break;
                }
 
@@ -2219,6 +2328,20 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
                ieee80211_sta_reset_conn_monitor(sdata);
 }
 
+static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata,
+                                         const u8 *src, const u8 *dst,
+                                         const u8 *ssid, size_t ssid_len,
+                                         struct ieee80211_channel *channel)
+{
+       struct sk_buff *skb;
+
+       skb = ieee80211_build_probe_req(sdata, src, dst, (u32)-1, channel,
+                                       ssid, ssid_len, NULL, 0,
+                                       IEEE80211_PROBE_FLAG_DIRECTED);
+       if (skb)
+               ieee80211_tx_skb(sdata, skb);
+}
+
 static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 {
        struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -2265,10 +2388,9 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
                else
                        ssid_len = ssid[1];
 
-               ieee80211_send_probe_req(sdata, sdata->vif.addr, dst,
-                                        ssid + 2, ssid_len, NULL,
-                                        0, (u32) -1, true, 0,
-                                        ifmgd->associated->channel, false);
+               ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst,
+                                             ssid + 2, ssid_len,
+                                             ifmgd->associated->channel);
                rcu_read_unlock();
        }
 
@@ -2370,7 +2492,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
        skb = ieee80211_build_probe_req(sdata, sdata->vif.addr, cbss->bssid,
                                        (u32) -1, cbss->channel,
                                        ssid + 2, ssid_len,
-                                       NULL, 0, true);
+                                       NULL, 0, IEEE80211_PROBE_FLAG_DIRECTED);
        rcu_read_unlock();
 
        return skb;
@@ -3008,6 +3130,25 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
                goto out;
        }
 
+       /*
+        * If AP doesn't support HT, or it doesn't have HE mandatory IEs, mark
+        * HE as disabled. If on the 5GHz band, make sure it supports VHT.
+        */
+       if (ifmgd->flags & IEEE80211_STA_DISABLE_HT ||
+           (sband->band == NL80211_BAND_5GHZ &&
+            ifmgd->flags & IEEE80211_STA_DISABLE_VHT) ||
+           (!elems.he_cap && !elems.he_operation))
+               ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+
+       if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+           (!elems.he_cap || !elems.he_operation)) {
+               mutex_unlock(&sdata->local->sta_mtx);
+               sdata_info(sdata,
+                          "HE AP is missing HE capability/operation\n");
+               ret = false;
+               goto out;
+       }
+
        /* Set up internal HT/VHT capabilities */
        if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
                ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
@@ -3017,6 +3158,48 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
                ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
                                                    elems.vht_cap_elem, sta);
 
+       if (elems.he_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+           elems.he_cap) {
+               ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
+                                                 elems.he_cap,
+                                                 elems.he_cap_len,
+                                                 sta);
+
+               bss_conf->he_support = sta->sta.he_cap.has_he;
+       } else {
+               bss_conf->he_support = false;
+       }
+
+       if (bss_conf->he_support) {
+               u32 he_oper_params =
+                       le32_to_cpu(elems.he_operation->he_oper_params);
+
+               bss_conf->bss_color = he_oper_params &
+                                     IEEE80211_HE_OPERATION_BSS_COLOR_MASK;
+               bss_conf->htc_trig_based_pkt_ext =
+                       (he_oper_params &
+                        IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK) <<
+                       IEEE80211_HE_OPERATION_DFLT_PE_DURATION_OFFSET;
+               bss_conf->frame_time_rts_th =
+                       (he_oper_params &
+                        IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK) <<
+                       IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET;
+
+               bss_conf->multi_sta_back_32bit =
+                       sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+                       IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP;
+
+               bss_conf->ack_enabled =
+                       sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+                       IEEE80211_HE_MAC_CAP2_ACK_EN;
+
+               bss_conf->uora_exists = !!elems.uora_element;
+               if (elems.uora_element)
+                       bss_conf->uora_ocw_range = elems.uora_element[0];
+
+               /* TODO: OPEN: what happens if BSS color disable is set? */
+       }
+
        /*
         * Some APs, e.g. Netgear WNDR3700, report invalid HT operation data
         * in their association response, so ignore that data for our own
@@ -3076,7 +3259,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
        if (ifmgd->flags & IEEE80211_STA_DISABLE_WMM) {
                ieee80211_set_wmm_default(sdata, false, false);
        } else if (!ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
-                                            elems.wmm_param_len)) {
+                                            elems.wmm_param_len,
+                                            elems.mu_edca_param_set)) {
                /* still enable QoS since we might have HT/VHT */
                ieee80211_set_wmm_default(sdata, false, true);
                /* set the disable-WMM flag in this case to disable
@@ -3590,7 +3774,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
        if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) &&
            ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
-                                    elems.wmm_param_len))
+                                    elems.wmm_param_len,
+                                    elems.mu_edca_param_set))
                changed |= BSS_CHANGED_QOS;
 
        /*
@@ -3629,7 +3814,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
        if (ieee80211_config_bw(sdata, sta,
                                elems.ht_cap_elem, elems.ht_operation,
-                               elems.vht_operation, bssid, &changed)) {
+                               elems.vht_operation, elems.he_operation,
+                               bssid, &changed)) {
                mutex_unlock(&local->sta_mtx);
                sdata_info(sdata,
                           "failed to follow AP %pM bandwidth change, disconnect\n",
@@ -4266,6 +4452,68 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
        return chains;
 }
 
+static bool
+ieee80211_verify_sta_he_mcs_support(struct ieee80211_supported_band *sband,
+                                   const struct ieee80211_he_operation *he_op)
+{
+       const struct ieee80211_sta_he_cap *sta_he_cap =
+               ieee80211_get_he_sta_cap(sband);
+       u16 ap_min_req_set;
+       int i;
+
+       if (!sta_he_cap || !he_op)
+               return false;
+
+       ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
+
+       /* Need to go over for 80MHz, 160MHz and for 80+80 */
+       for (i = 0; i < 3; i++) {
+               const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp =
+                       &sta_he_cap->he_mcs_nss_supp;
+               u16 sta_mcs_map_rx =
+                       le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]);
+               u16 sta_mcs_map_tx =
+                       le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]);
+               u8 nss;
+               bool verified = true;
+
+               /*
+                * For each band there is a maximum of 8 spatial streams
+                * possible. Each of the sta_mcs_map_* is a 16-bit struct built
+                * of 2 bits per NSS (1-8), with the values defined in enum
+                * ieee80211_he_mcs_support. Need to make sure STA TX and RX
+                * capabilities aren't less than the AP's minimum requirements
+                * for this HE BSS per SS.
+                * It is enough to find one such band that meets the reqs.
+                */
+               for (nss = 8; nss > 0; nss--) {
+                       u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3;
+                       u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3;
+                       u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+
+                       if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
+                               continue;
+
+                       /*
+                        * Make sure the HE AP doesn't require MCSs that aren't
+                        * supported by the client
+                        */
+                       if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+                           sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+                           (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) {
+                               verified = false;
+                               break;
+                       }
+               }
+
+               if (verified)
+                       return true;
+       }
+
+       /* If here, STA doesn't meet AP's HE min requirements */
+       return false;
+}
+
 static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
                                  struct cfg80211_bss *cbss)
 {
@@ -4274,6 +4522,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
        const struct ieee80211_ht_cap *ht_cap = NULL;
        const struct ieee80211_ht_operation *ht_oper = NULL;
        const struct ieee80211_vht_operation *vht_oper = NULL;
+       const struct ieee80211_he_operation *he_oper = NULL;
        struct ieee80211_supported_band *sband;
        struct cfg80211_chan_def chandef;
        int ret;
@@ -4329,6 +4578,24 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
                }
        }
 
+       if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+           ieee80211_get_he_sta_cap(sband)) {
+               const struct cfg80211_bss_ies *ies;
+               const u8 *he_oper_ie;
+
+               ies = rcu_dereference(cbss->ies);
+               he_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION,
+                                                 ies->data, ies->len);
+               if (he_oper_ie &&
+                   he_oper_ie[1] == ieee80211_he_oper_size(&he_oper_ie[3]))
+                       he_oper = (void *)(he_oper_ie + 3);
+               else
+                       he_oper = NULL;
+
+               if (!ieee80211_verify_sta_he_mcs_support(sband, he_oper))
+                       ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+       }
+
        /* Allow VHT if at least one channel on the sband supports 80 MHz */
        have_80mhz = false;
        for (i = 0; i < sband->n_channels; i++) {
@@ -4345,7 +4612,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 
        ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
                                                     cbss->channel,
-                                                    ht_oper, vht_oper,
+                                                    ht_oper, vht_oper, he_oper,
                                                     &chandef, false);
 
        sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
@@ -4751,8 +5018,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
                    req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) {
                        ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
                        ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+                       ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
                        netdev_info(sdata->dev,
-                                   "disabling HT/VHT due to WEP/TKIP use\n");
+                                   "disabling HE/HT/VHT due to WEP/TKIP use\n");
                }
        }
 
index f1d40b6645ff2777713885bef12936860f5ec9c8..8ef4153cd2994a73b4be5733a1dc12e9fa329640 100644 (file)
@@ -262,7 +262,7 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
        if (roc->mgmt_tx_cookie) {
                if (!WARN_ON(!roc->frame)) {
                        ieee80211_tx_skb_tid_band(roc->sdata, roc->frame, 7,
-                                                 roc->chan->band);
+                                                 roc->chan->band, 0);
                        roc->frame = NULL;
                }
        } else {
index 76048b53c5b27637d343868c69ab54c928c6614d..07fb219327d656f83845793117b165aabd623ee6 100644 (file)
@@ -751,4 +751,3 @@ rc80211_minstrel_exit(void)
 {
        ieee80211_rate_control_unregister(&mac80211_minstrel);
 }
-
index 932985ca4e66829ffa559fac1a10243e93043101..64742f2765c4846c36d3f9304314059023215d0a 100644 (file)
@@ -175,6 +175,20 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
                len += 12;
        }
 
+       if (status->encoding == RX_ENC_HE &&
+           status->flag & RX_FLAG_RADIOTAP_HE) {
+               len = ALIGN(len, 2);
+               len += 12;
+               BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he) != 12);
+       }
+
+       if (status->encoding == RX_ENC_HE &&
+           status->flag & RX_FLAG_RADIOTAP_HE_MU) {
+               len = ALIGN(len, 2);
+               len += 12;
+               BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he_mu) != 12);
+       }
+
        if (status->chains) {
                /* antenna and antenna signal fields */
                len += 2 * hweight8(status->chains);
@@ -263,6 +277,19 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
        int mpdulen, chain;
        unsigned long chains = status->chains;
        struct ieee80211_vendor_radiotap rtap = {};
+       struct ieee80211_radiotap_he he = {};
+       struct ieee80211_radiotap_he_mu he_mu = {};
+
+       if (status->flag & RX_FLAG_RADIOTAP_HE) {
+               he = *(struct ieee80211_radiotap_he *)skb->data;
+               skb_pull(skb, sizeof(he));
+               WARN_ON_ONCE(status->encoding != RX_ENC_HE);
+       }
+
+       if (status->flag & RX_FLAG_RADIOTAP_HE_MU) {
+               he_mu = *(struct ieee80211_radiotap_he_mu *)skb->data;
+               skb_pull(skb, sizeof(he_mu));
+       }
 
        if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
                rtap = *(struct ieee80211_vendor_radiotap *)skb->data;
@@ -520,6 +547,89 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
                *pos++ = flags;
        }
 
+       if (status->encoding == RX_ENC_HE &&
+           status->flag & RX_FLAG_RADIOTAP_HE) {
+#define HE_PREP(f, val)        cpu_to_le16(FIELD_PREP(IEEE80211_RADIOTAP_HE_##f, val))
+
+               if (status->enc_flags & RX_ENC_FLAG_STBC_MASK) {
+                       he.data6 |= HE_PREP(DATA6_NSTS,
+                                           FIELD_GET(RX_ENC_FLAG_STBC_MASK,
+                                                     status->enc_flags));
+                       he.data3 |= HE_PREP(DATA3_STBC, 1);
+               } else {
+                       he.data6 |= HE_PREP(DATA6_NSTS, status->nss);
+               }
+
+#define CHECK_GI(s) \
+       BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \
+                    (int)NL80211_RATE_INFO_HE_GI_##s)
+
+               CHECK_GI(0_8);
+               CHECK_GI(1_6);
+               CHECK_GI(3_2);
+
+               he.data3 |= HE_PREP(DATA3_DATA_MCS, status->rate_idx);
+               he.data3 |= HE_PREP(DATA3_DATA_DCM, status->he_dcm);
+               he.data3 |= HE_PREP(DATA3_CODING,
+                                   !!(status->enc_flags & RX_ENC_FLAG_LDPC));
+
+               he.data5 |= HE_PREP(DATA5_GI, status->he_gi);
+
+               switch (status->bw) {
+               case RATE_INFO_BW_20:
+                       he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+                                           IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ);
+                       break;
+               case RATE_INFO_BW_40:
+                       he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+                                           IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ);
+                       break;
+               case RATE_INFO_BW_80:
+                       he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+                                           IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ);
+                       break;
+               case RATE_INFO_BW_160:
+                       he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+                                           IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ);
+                       break;
+               case RATE_INFO_BW_HE_RU:
+#define CHECK_RU_ALLOC(s) \
+       BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_##s##T != \
+                    NL80211_RATE_INFO_HE_RU_ALLOC_##s + 4)
+
+                       CHECK_RU_ALLOC(26);
+                       CHECK_RU_ALLOC(52);
+                       CHECK_RU_ALLOC(106);
+                       CHECK_RU_ALLOC(242);
+                       CHECK_RU_ALLOC(484);
+                       CHECK_RU_ALLOC(996);
+                       CHECK_RU_ALLOC(2x996);
+
+                       he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+                                           status->he_ru + 4);
+                       break;
+               default:
+                       WARN_ONCE(1, "Invalid SU BW %d\n", status->bw);
+               }
+
+               /* ensure 2 byte alignment */
+               while ((pos - (u8 *)rthdr) & 1)
+                       pos++;
+               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+               memcpy(pos, &he, sizeof(he));
+               pos += sizeof(he);
+       }
+
+       if (status->encoding == RX_ENC_HE &&
+           status->flag & RX_FLAG_RADIOTAP_HE_MU) {
+               /* ensure 2 byte alignment */
+               while ((pos - (u8 *)rthdr) & 1)
+                       pos++;
+               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE_MU);
+               memcpy(pos, &he_mu, sizeof(he_mu));
+               pos += sizeof(he_mu);
+       }
+
        for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
                *pos++ = status->chain_signal[chain];
                *pos++ = chain;
@@ -613,6 +723,12 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
                rcu_dereference(local->monitor_sdata);
        bool only_monitor = false;
 
+       if (status->flag & RX_FLAG_RADIOTAP_HE)
+               rtap_space += sizeof(struct ieee80211_radiotap_he);
+
+       if (status->flag & RX_FLAG_RADIOTAP_HE_MU)
+               rtap_space += sizeof(struct ieee80211_radiotap_he_mu);
+
        if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) {
                struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data;
 
@@ -3238,7 +3354,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
                }
 
                __ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7,
-                                           status->band);
+                                           status->band, 0);
        }
        dev_kfree_skb(rx->skb);
        return RX_QUEUED;
@@ -3383,8 +3499,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
                status = IEEE80211_SKB_RXCB((rx->skb));
 
                sband = rx->local->hw.wiphy->bands[status->band];
-               if (!(status->encoding == RX_ENC_HT) &&
-                   !(status->encoding == RX_ENC_VHT))
+               if (status->encoding == RX_ENC_LEGACY)
                        rate = &sband->bitrates[status->rate_idx];
 
                ieee80211_rx_cooked_monitor(rx, rate);
@@ -4383,6 +4498,14 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
                                      status->rate_idx, status->nss))
                                goto drop;
                        break;
+               case RX_ENC_HE:
+                       if (WARN_ONCE(status->rate_idx > 11 ||
+                                     !status->nss ||
+                                     status->nss > 8,
+                                     "Rate marked as an HE rate but data is invalid: MCS: %d, NSS: %d\n",
+                                     status->rate_idx, status->nss))
+                               goto drop;
+                       break;
                default:
                        WARN_ON_ONCE(1);
                        /* fall through */
index 2e917a6d239d234ce671b8b4017dbd23c4be5b2e..5d2a11777718c42c3ba4affb190904d2b7bd61de 100644 (file)
@@ -20,6 +20,7 @@
 #include <net/sch_generic.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/random.h>
 #include <net/mac80211.h>
 
 #include "ieee80211_i.h"
@@ -293,6 +294,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
        struct cfg80211_chan_def chandef;
        u8 bands_used = 0;
        int i, ielen, n_chans;
+       u32 flags = 0;
 
        req = rcu_dereference_protected(local->scan_req,
                                        lockdep_is_held(&local->mtx));
@@ -331,12 +333,16 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
        local->hw_scan_req->req.n_channels = n_chans;
        ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
 
+       if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
+               flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT;
+
        ielen = ieee80211_build_preq_ies(local,
                                         (u8 *)local->hw_scan_req->req.ie,
                                         local->hw_scan_ies_bufsize,
                                         &local->hw_scan_req->ies,
                                         req->ie, req->ie_len,
-                                        bands_used, req->rates, &chandef);
+                                        bands_used, req->rates, &chandef,
+                                        flags);
        local->hw_scan_req->req.ie_len = ielen;
        local->hw_scan_req->req.no_cck = req->no_cck;
        ether_addr_copy(local->hw_scan_req->req.mac_addr, req->mac_addr);
@@ -528,6 +534,35 @@ void ieee80211_run_deferred_scan(struct ieee80211_local *local)
                                     round_jiffies_relative(0));
 }
 
+static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
+                                         const u8 *src, const u8 *dst,
+                                         const u8 *ssid, size_t ssid_len,
+                                         const u8 *ie, size_t ie_len,
+                                         u32 ratemask, u32 flags, u32 tx_flags,
+                                         struct ieee80211_channel *channel)
+{
+       struct sk_buff *skb;
+       u32 txdata_flags = 0;
+
+       skb = ieee80211_build_probe_req(sdata, src, dst, ratemask, channel,
+                                       ssid, ssid_len,
+                                       ie, ie_len, flags);
+
+       if (skb) {
+               if (flags & IEEE80211_PROBE_FLAG_RANDOM_SN) {
+                       struct ieee80211_hdr *hdr = (void *)skb->data;
+                       u16 sn = get_random_u32();
+
+                       txdata_flags |= IEEE80211_TX_NO_SEQNO;
+                       hdr->seq_ctrl =
+                               cpu_to_le16(IEEE80211_SN_TO_SEQ(sn));
+               }
+               IEEE80211_SKB_CB(skb)->flags |= tx_flags;
+               ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band,
+                                         txdata_flags);
+       }
+}
+
 static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
                                            unsigned long *next_delay)
 {
@@ -535,7 +570,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
        struct ieee80211_sub_if_data *sdata;
        struct cfg80211_scan_request *scan_req;
        enum nl80211_band band = local->hw.conf.chandef.chan->band;
-       u32 tx_flags;
+       u32 flags = 0, tx_flags;
 
        scan_req = rcu_dereference_protected(local->scan_req,
                                             lockdep_is_held(&local->mtx));
@@ -543,17 +578,21 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
        tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
        if (scan_req->no_cck)
                tx_flags |= IEEE80211_TX_CTL_NO_CCK_RATE;
+       if (scan_req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
+               flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT;
+       if (scan_req->flags & NL80211_SCAN_FLAG_RANDOM_SN)
+               flags |= IEEE80211_PROBE_FLAG_RANDOM_SN;
 
        sdata = rcu_dereference_protected(local->scan_sdata,
                                          lockdep_is_held(&local->mtx));
 
        for (i = 0; i < scan_req->n_ssids; i++)
-               ieee80211_send_probe_req(
+               ieee80211_send_scan_probe_req(
                        sdata, local->scan_addr, scan_req->bssid,
                        scan_req->ssids[i].ssid, scan_req->ssids[i].ssid_len,
                        scan_req->ie, scan_req->ie_len,
-                       scan_req->rates[band], false,
-                       tx_flags, local->hw.conf.chandef.chan, true);
+                       scan_req->rates[band], flags,
+                       tx_flags, local->hw.conf.chandef.chan);
 
        /*
         * After sending probe requests, wait for probe responses
@@ -1141,6 +1180,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
        u32 rate_masks[NUM_NL80211_BANDS] = {};
        u8 bands_used = 0;
        u8 *ie;
+       u32 flags = 0;
 
        iebufsz = local->scan_ies_len + req->ie_len;
 
@@ -1157,6 +1197,9 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
                }
        }
 
+       if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
+               flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT;
+
        ie = kcalloc(iebufsz, num_bands, GFP_KERNEL);
        if (!ie) {
                ret = -ENOMEM;
@@ -1167,7 +1210,8 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 
        ieee80211_build_preq_ies(local, ie, num_bands * iebufsz,
                                 &sched_scan_ies, req->ie,
-                                req->ie_len, bands_used, rate_masks, &chandef);
+                                req->ie_len, bands_used, rate_masks, &chandef,
+                                flags);
 
        ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies);
        if (ret == 0) {
index 6428f1ac37b67afda55d7335bee59b2d140813a8..f34202242d24d074f5cca49c7b4b7a101114f73b 100644 (file)
@@ -1323,6 +1323,11 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
        struct ieee80211_tx_info *info;
        struct ieee80211_chanctx_conf *chanctx_conf;
 
+       /* Don't send NDPs when STA is connected HE */
+       if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+           !(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
+               return;
+
        if (qos) {
                fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
                                 IEEE80211_STYPE_QOS_NULLFUNC |
@@ -1391,7 +1396,7 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
        }
 
        info->band = chanctx_conf->def.chan->band;
-       ieee80211_xmit(sdata, sta, skb);
+       ieee80211_xmit(sdata, sta, skb, 0);
        rcu_read_unlock();
 }
 
@@ -1968,7 +1973,7 @@ sta_get_last_rx_stats(struct sta_info *sta)
        return stats;
 }
 
-static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
+static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate,
                                  struct rate_info *rinfo)
 {
        rinfo->bw = STA_STATS_GET(BW, rate);
@@ -2005,6 +2010,14 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
                rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
                break;
                }
+       case STA_STATS_RATE_TYPE_HE:
+               rinfo->flags = RATE_INFO_FLAGS_HE_MCS;
+               rinfo->mcs = STA_STATS_GET(HE_MCS, rate);
+               rinfo->nss = STA_STATS_GET(HE_NSS, rate);
+               rinfo->he_gi = STA_STATS_GET(HE_GI, rate);
+               rinfo->he_ru_alloc = STA_STATS_GET(HE_RU, rate);
+               rinfo->he_dcm = STA_STATS_GET(HE_DCM, rate);
+               break;
        }
 }
 
@@ -2101,38 +2114,38 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 
        drv_sta_statistics(local, sdata, &sta->sta, sinfo);
 
-       sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME) |
-                        BIT(NL80211_STA_INFO_STA_FLAGS) |
-                        BIT(NL80211_STA_INFO_BSS_PARAM) |
-                        BIT(NL80211_STA_INFO_CONNECTED_TIME) |
-                        BIT(NL80211_STA_INFO_RX_DROP_MISC);
+       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) |
+                        BIT_ULL(NL80211_STA_INFO_STA_FLAGS) |
+                        BIT_ULL(NL80211_STA_INFO_BSS_PARAM) |
+                        BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME) |
+                        BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC);
 
        if (sdata->vif.type == NL80211_IFTYPE_STATION) {
                sinfo->beacon_loss_count = sdata->u.mgd.beacon_loss_count;
-               sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_LOSS);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS);
        }
 
        sinfo->connected_time = ktime_get_seconds() - sta->last_connected;
        sinfo->inactive_time =
                jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta));
 
-       if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) |
-                              BIT(NL80211_STA_INFO_TX_BYTES)))) {
+       if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) |
+                              BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) {
                sinfo->tx_bytes = 0;
                for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
                        sinfo->tx_bytes += sta->tx_stats.bytes[ac];
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64);
        }
 
-       if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) {
+       if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) {
                sinfo->tx_packets = 0;
                for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
                        sinfo->tx_packets += sta->tx_stats.packets[ac];
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
        }
 
-       if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) |
-                              BIT(NL80211_STA_INFO_RX_BYTES)))) {
+       if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) |
+                              BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) {
                sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
 
                if (sta->pcpu_rx_stats) {
@@ -2144,10 +2157,10 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
                        }
                }
 
-               sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64);
        }
 
-       if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) {
+       if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) {
                sinfo->rx_packets = sta->rx_stats.packets;
                if (sta->pcpu_rx_stats) {
                        for_each_possible_cpu(cpu) {
@@ -2157,17 +2170,17 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
                                sinfo->rx_packets += cpurxs->packets;
                        }
                }
-               sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS);
        }
 
-       if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) {
+       if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) {
                sinfo->tx_retries = sta->status_stats.retry_count;
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES);
        }
 
-       if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) {
+       if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) {
                sinfo->tx_failed = sta->status_stats.retry_failed;
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
        }
 
        sinfo->rx_dropped_misc = sta->rx_stats.dropped;
@@ -2182,23 +2195,23 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 
        if (sdata->vif.type == NL80211_IFTYPE_STATION &&
            !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_RX) |
-                                BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX) |
+                                BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG);
                sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif);
        }
 
        if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
            ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
-               if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
+               if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL))) {
                        sinfo->signal = (s8)last_rxstats->last_signal;
-                       sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
                }
 
                if (!sta->pcpu_rx_stats &&
-                   !(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
+                   !(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) {
                        sinfo->signal_avg =
                                -ewma_signal_read(&sta->rx_stats_avg.signal);
-                       sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
                }
        }
 
@@ -2207,11 +2220,11 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
         * pcpu statistics
         */
        if (last_rxstats->chains &&
-           !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
-                              BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL);
+           !(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) |
+                              BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
                if (!sta->pcpu_rx_stats)
-                       sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
 
                sinfo->chains = last_rxstats->chains;
 
@@ -2223,15 +2236,15 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
                }
        }
 
-       if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) {
+       if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) {
                sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate,
                                     &sinfo->txrate);
-               sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
        }
 
-       if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) {
+       if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE))) {
                if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0)
-                       sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
        }
 
        if (tidstats && !cfg80211_sinfo_alloc_tid_stats(sinfo, GFP_KERNEL)) {
@@ -2244,18 +2257,18 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 
        if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
-               sinfo->filled |= BIT(NL80211_STA_INFO_LLID) |
-                                BIT(NL80211_STA_INFO_PLID) |
-                                BIT(NL80211_STA_INFO_PLINK_STATE) |
-                                BIT(NL80211_STA_INFO_LOCAL_PM) |
-                                BIT(NL80211_STA_INFO_PEER_PM) |
-                                BIT(NL80211_STA_INFO_NONPEER_PM);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_LLID) |
+                                BIT_ULL(NL80211_STA_INFO_PLID) |
+                                BIT_ULL(NL80211_STA_INFO_PLINK_STATE) |
+                                BIT_ULL(NL80211_STA_INFO_LOCAL_PM) |
+                                BIT_ULL(NL80211_STA_INFO_PEER_PM) |
+                                BIT_ULL(NL80211_STA_INFO_NONPEER_PM);
 
                sinfo->llid = sta->mesh->llid;
                sinfo->plid = sta->mesh->plid;
                sinfo->plink_state = sta->mesh->plink_state;
                if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
-                       sinfo->filled |= BIT(NL80211_STA_INFO_T_OFFSET);
+                       sinfo->filled |= BIT_ULL(NL80211_STA_INFO_T_OFFSET);
                        sinfo->t_offset = sta->mesh->t_offset;
                }
                sinfo->local_pm = sta->mesh->local_pm;
@@ -2300,7 +2313,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
        thr = sta_get_expected_throughput(sta);
 
        if (thr != 0) {
-               sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
+               sinfo->filled |= BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
                sinfo->expected_throughput = thr;
        }
 
index 81b35f62379249e590b178283569bbd2c57bcd05..9a04327d71d1de1129a7589195c574e8b62fa74c 100644 (file)
@@ -170,7 +170,7 @@ struct tid_ampdu_tx {
        u8 dialog_token;
        u8 stop_initiator;
        bool tx_stop;
-       u8 buf_size;
+       u16 buf_size;
 
        u16 failed_bar_ssn;
        bool bar_pending;
@@ -405,7 +405,7 @@ struct ieee80211_sta_rx_stats {
        int last_signal;
        u8 chains;
        s8 chain_signal_last[IEEE80211_MAX_CHAINS];
-       u16 last_rate;
+       u32 last_rate;
        struct u64_stats_sync syncp;
        u64 bytes;
        u64 msdu[IEEE80211_NUM_TIDS + 1];
@@ -764,6 +764,7 @@ enum sta_stats_type {
        STA_STATS_RATE_TYPE_LEGACY,
        STA_STATS_RATE_TYPE_HT,
        STA_STATS_RATE_TYPE_VHT,
+       STA_STATS_RATE_TYPE_HE,
 };
 
 #define STA_STATS_FIELD_HT_MCS         GENMASK( 7,  0)
@@ -771,9 +772,14 @@ enum sta_stats_type {
 #define STA_STATS_FIELD_LEGACY_BAND    GENMASK( 7,  4)
 #define STA_STATS_FIELD_VHT_MCS                GENMASK( 3,  0)
 #define STA_STATS_FIELD_VHT_NSS                GENMASK( 7,  4)
+#define STA_STATS_FIELD_HE_MCS         GENMASK( 3,  0)
+#define STA_STATS_FIELD_HE_NSS         GENMASK( 7,  4)
 #define STA_STATS_FIELD_BW             GENMASK(11,  8)
 #define STA_STATS_FIELD_SGI            GENMASK(12, 12)
 #define STA_STATS_FIELD_TYPE           GENMASK(15, 13)
+#define STA_STATS_FIELD_HE_RU          GENMASK(18, 16)
+#define STA_STATS_FIELD_HE_GI          GENMASK(20, 19)
+#define STA_STATS_FIELD_HE_DCM         GENMASK(21, 21)
 
 #define STA_STATS_FIELD(_n, _v)                FIELD_PREP(STA_STATS_FIELD_ ## _n, _v)
 #define STA_STATS_GET(_n, _v)          FIELD_GET(STA_STATS_FIELD_ ## _n, _v)
@@ -782,7 +788,7 @@ enum sta_stats_type {
 
 static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s)
 {
-       u16 r;
+       u32 r;
 
        r = STA_STATS_FIELD(BW, s->bw);
 
@@ -804,6 +810,14 @@ static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s)
                r |= STA_STATS_FIELD(LEGACY_BAND, s->band);
                r |= STA_STATS_FIELD(LEGACY_IDX, s->rate_idx);
                break;
+       case RX_ENC_HE:
+               r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_HE);
+               r |= STA_STATS_FIELD(HE_NSS, s->nss);
+               r |= STA_STATS_FIELD(HE_MCS, s->rate_idx);
+               r |= STA_STATS_FIELD(HE_GI, s->he_gi);
+               r |= STA_STATS_FIELD(HE_RU, s->he_ru);
+               r |= STA_STATS_FIELD(HE_DCM, s->he_dcm);
+               break;
        default:
                WARN_ON(1);
                return STA_STATS_RATE_INVALID;
index 80a7edf8d314a12a22f62047e585275a7baa3ab9..0ab69a1964f8b97a7035a58f0321e5490c6a9a58 100644 (file)
@@ -92,7 +92,7 @@
                                STA_ENTRY                                               \
                                __field(u16, tid)                                       \
                                __field(u16, ssn)                                       \
-                               __field(u8, buf_size)                                   \
+                               __field(u16, buf_size)                                  \
                                __field(bool, amsdu)                                    \
                                __field(u16, timeout)                                   \
                                __field(u16, action)
index fa1f1e63a2640fd405e42e5aeae9718b4ef12d2a..cd332e3e1134bed3efb89838ac245b0402e7a604 100644 (file)
@@ -825,6 +825,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
         */
        if (!ieee80211_is_data_qos(hdr->frame_control) ||
            is_multicast_ether_addr(hdr->addr1)) {
+               if (tx->flags & IEEE80211_TX_NO_SEQNO)
+                       return TX_CONTINUE;
                /* driver should assign sequence number */
                info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
                /* for pure STA mode without beacons, we can do it */
@@ -1247,7 +1249,7 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
            (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
                return NULL;
 
-       if (!ieee80211_is_data(hdr->frame_control))
+       if (!ieee80211_is_data_present(hdr->frame_control))
                return NULL;
 
        if (sta) {
@@ -1854,7 +1856,7 @@ EXPORT_SYMBOL(ieee80211_tx_prepare_skb);
  */
 static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
                         struct sta_info *sta, struct sk_buff *skb,
-                        bool txpending)
+                        bool txpending, u32 txdata_flags)
 {
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_tx_data tx;
@@ -1872,6 +1874,8 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
        led_len = skb->len;
        res_prepare = ieee80211_tx_prepare(sdata, &tx, sta, skb);
 
+       tx.flags |= txdata_flags;
+
        if (unlikely(res_prepare == TX_DROP)) {
                ieee80211_free_txskb(&local->hw, skb);
                return true;
@@ -1933,7 +1937,8 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
 }
 
 void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
-                   struct sta_info *sta, struct sk_buff *skb)
+                   struct sta_info *sta, struct sk_buff *skb,
+                   u32 txdata_flags)
 {
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -1968,7 +1973,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
        }
 
        ieee80211_set_qos_hdr(sdata, skb);
-       ieee80211_tx(sdata, sta, skb, false);
+       ieee80211_tx(sdata, sta, skb, false, txdata_flags);
 }
 
 static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
@@ -2289,7 +2294,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
        if (!ieee80211_parse_tx_radiotap(local, skb))
                goto fail_rcu;
 
-       ieee80211_xmit(sdata, NULL, skb);
+       ieee80211_xmit(sdata, NULL, skb, 0);
        rcu_read_unlock();
 
        return NETDEV_TX_OK;
@@ -3648,7 +3653,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 
                ieee80211_tx_stats(dev, skb->len);
 
-               ieee80211_xmit(sdata, sta, skb);
+               ieee80211_xmit(sdata, sta, skb, 0);
        }
        goto out;
  out_free:
@@ -3867,7 +3872,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
                        return true;
                }
                info->band = chanctx_conf->def.chan->band;
-               result = ieee80211_tx(sdata, NULL, skb, true);
+               result = ieee80211_tx(sdata, NULL, skb, true, 0);
        } else {
                struct sk_buff_head skbs;
 
@@ -4783,7 +4788,7 @@ EXPORT_SYMBOL(ieee80211_unreserve_tid);
 
 void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
                                 struct sk_buff *skb, int tid,
-                                enum nl80211_band band)
+                                enum nl80211_band band, u32 txdata_flags)
 {
        int ac = ieee80211_ac_from_tid(tid);
 
@@ -4800,7 +4805,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
         */
        local_bh_disable();
        IEEE80211_SKB_CB(skb)->band = band;
-       ieee80211_xmit(sdata, NULL, skb);
+       ieee80211_xmit(sdata, NULL, skb, txdata_flags);
        local_bh_enable();
 }
 
index d02fbfec37835bce6a27ecfdc146b95ba0ca077f..88efda7c9f8a78737538a355b1b499104ab55aea 100644 (file)
@@ -1095,6 +1095,21 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                        if (elen >= sizeof(*elems->max_idle_period_ie))
                                elems->max_idle_period_ie = (void *)pos;
                        break;
+               case WLAN_EID_EXTENSION:
+                       if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA &&
+                           elen >= (sizeof(*elems->mu_edca_param_set) + 1)) {
+                               elems->mu_edca_param_set = (void *)&pos[1];
+                       } else if (pos[0] == WLAN_EID_EXT_HE_CAPABILITY) {
+                               elems->he_cap = (void *)&pos[1];
+                               elems->he_cap_len = elen - 1;
+                       } else if (pos[0] == WLAN_EID_EXT_HE_OPERATION &&
+                                  elen >= sizeof(*elems->he_operation) &&
+                                  elen >= ieee80211_he_oper_size(&pos[1])) {
+                               elems->he_operation = (void *)&pos[1];
+                       } else if (pos[0] == WLAN_EID_EXT_UORA && elen >= 1) {
+                               elems->uora_element = (void *)&pos[1];
+                       }
+                       break;
                default:
                        break;
                }
@@ -1353,9 +1368,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
                                         enum nl80211_band band,
                                         u32 rate_mask,
                                         struct cfg80211_chan_def *chandef,
-                                        size_t *offset)
+                                        size_t *offset, u32 flags)
 {
        struct ieee80211_supported_band *sband;
+       const struct ieee80211_sta_he_cap *he_cap;
        u8 *pos = buffer, *end = buffer + buffer_len;
        size_t noffset;
        int supp_rates_len, i;
@@ -1433,6 +1449,9 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
                                chandef->chan->center_freq);
        }
 
+       if (flags & IEEE80211_PROBE_FLAG_MIN_CONTENT)
+               goto done;
+
        /* insert custom IEs that go before HT */
        if (ie && ie_len) {
                static const u8 before_ht[] = {
@@ -1460,11 +1479,6 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
                                                sband->ht_cap.cap);
        }
 
-       /*
-        * If adding more here, adjust code in main.c
-        * that calculates local->scan_ies_len.
-        */
-
        /* insert custom IEs that go before VHT */
        if (ie && ie_len) {
                static const u8 before_vht[] = {
@@ -1507,9 +1521,43 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
                                                 sband->vht_cap.cap);
        }
 
+       /* insert custom IEs that go before HE */
+       if (ie && ie_len) {
+               static const u8 before_he[] = {
+                       /*
+                        * no need to list the ones split off before VHT
+                        * or generated here
+                        */
+                       WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_REQ_PARAMS,
+                       WLAN_EID_AP_CSN,
+                       /* TODO: add 11ah/11aj/11ak elements */
+               };
+               noffset = ieee80211_ie_split(ie, ie_len,
+                                            before_he, ARRAY_SIZE(before_he),
+                                            *offset);
+               if (end - pos < noffset - *offset)
+                       goto out_err;
+               memcpy(pos, ie + *offset, noffset - *offset);
+               pos += noffset - *offset;
+               *offset = noffset;
+       }
+
+       he_cap = ieee80211_get_he_sta_cap(sband);
+       if (he_cap) {
+               pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
+               if (!pos)
+                       goto out_err;
+       }
+
+       /*
+        * If adding more here, adjust code in main.c
+        * that calculates local->scan_ies_len.
+        */
+
        return pos - buffer;
  out_err:
        WARN_ONCE(1, "not enough space for preq IEs\n");
+ done:
        return pos - buffer;
 }
 
@@ -1518,7 +1566,8 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
                             struct ieee80211_scan_ies *ie_desc,
                             const u8 *ie, size_t ie_len,
                             u8 bands_used, u32 *rate_masks,
-                            struct cfg80211_chan_def *chandef)
+                            struct cfg80211_chan_def *chandef,
+                            u32 flags)
 {
        size_t pos = 0, old_pos = 0, custom_ie_offset = 0;
        int i;
@@ -1533,7 +1582,8 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
                                                             ie, ie_len, i,
                                                             rate_masks[i],
                                                             chandef,
-                                                            &custom_ie_offset);
+                                                            &custom_ie_offset,
+                                                            flags);
                        ie_desc->ies[i] = buffer + old_pos;
                        ie_desc->len[i] = pos - old_pos;
                        old_pos = pos;
@@ -1561,7 +1611,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
                                          struct ieee80211_channel *chan,
                                          const u8 *ssid, size_t ssid_len,
                                          const u8 *ie, size_t ie_len,
-                                         bool directed)
+                                         u32 flags)
 {
        struct ieee80211_local *local = sdata->local;
        struct cfg80211_chan_def chandef;
@@ -1577,7 +1627,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
         * badly-behaved APs don't respond when this parameter is included.
         */
        chandef.width = sdata->vif.bss_conf.chandef.width;
-       if (directed)
+       if (flags & IEEE80211_PROBE_FLAG_DIRECTED)
                chandef.chan = NULL;
        else
                chandef.chan = chan;
@@ -1591,7 +1641,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
        ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb),
                                           skb_tailroom(skb), &dummy_ie_desc,
                                           ie, ie_len, BIT(chan->band),
-                                          rate_masks, &chandef);
+                                          rate_masks, &chandef, flags);
        skb_put(skb, ies_len);
 
        if (dst) {
@@ -1605,27 +1655,6 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
        return skb;
 }
 
-void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata,
-                             const u8 *src, const u8 *dst,
-                             const u8 *ssid, size_t ssid_len,
-                             const u8 *ie, size_t ie_len,
-                             u32 ratemask, bool directed, u32 tx_flags,
-                             struct ieee80211_channel *channel, bool scan)
-{
-       struct sk_buff *skb;
-
-       skb = ieee80211_build_probe_req(sdata, src, dst, ratemask, channel,
-                                       ssid, ssid_len,
-                                       ie, ie_len, directed);
-       if (skb) {
-               IEEE80211_SKB_CB(skb)->flags |= tx_flags;
-               if (scan)
-                       ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band);
-               else
-                       ieee80211_tx_skb(sdata, skb);
-       }
-}
-
 u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
                            struct ieee802_11_elems *elems,
                            enum nl80211_band band, u32 *basic_rates)
@@ -2413,6 +2442,72 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
        return pos;
 }
 
+u8 *ieee80211_ie_build_he_cap(u8 *pos,
+                             const struct ieee80211_sta_he_cap *he_cap,
+                             u8 *end)
+{
+       u8 n;
+       u8 ie_len;
+       u8 *orig_pos = pos;
+
+       /* Make sure we have place for the IE */
+       /*
+        * TODO: the 1 added is because this temporarily is under the EXTENSION
+        * IE. Get rid of it when it moves.
+        */
+       if (!he_cap)
+               return orig_pos;
+
+       n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem);
+       ie_len = 2 + 1 +
+                sizeof(he_cap->he_cap_elem) + n +
+                ieee80211_he_ppe_size(he_cap->ppe_thres[0],
+                                      he_cap->he_cap_elem.phy_cap_info);
+
+       if ((end - pos) < ie_len)
+               return orig_pos;
+
+       *pos++ = WLAN_EID_EXTENSION;
+       pos++; /* We'll set the size later below */
+       *pos++ = WLAN_EID_EXT_HE_CAPABILITY;
+
+       /* Fixed data */
+       memcpy(pos, &he_cap->he_cap_elem, sizeof(he_cap->he_cap_elem));
+       pos += sizeof(he_cap->he_cap_elem);
+
+       memcpy(pos, &he_cap->he_mcs_nss_supp, n);
+       pos += n;
+
+       /* Check if PPE Threshold should be present */
+       if ((he_cap->he_cap_elem.phy_cap_info[6] &
+            IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) == 0)
+               goto end;
+
+       /*
+        * Calculate how many PPET16/PPET8 pairs are to come. Algorithm:
+        * (NSS_M1 + 1) x (num of 1 bits in RU_INDEX_BITMASK)
+        */
+       n = hweight8(he_cap->ppe_thres[0] &
+                    IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK);
+       n *= (1 + ((he_cap->ppe_thres[0] & IEEE80211_PPE_THRES_NSS_MASK) >>
+                  IEEE80211_PPE_THRES_NSS_POS));
+
+       /*
+        * Each pair is 6 bits, and we need to add the 7 "header" bits to the
+        * total size.
+        */
+       n = (n * IEEE80211_PPE_THRES_INFO_PPET_SIZE * 2) + 7;
+       n = DIV_ROUND_UP(n, 8);
+
+       /* Copy PPE Thresholds */
+       memcpy(pos, &he_cap->ppe_thres, n);
+       pos += n;
+
+end:
+       orig_pos[1] = (pos - orig_pos) - 2;
+       return pos;
+}
+
 u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
                               const struct cfg80211_chan_def *chandef,
                               u16 prot_mode, bool rifs_mode)
index 6e558a419f60337a28426db411d88c7fed60213a..94f53a9b7d1ae67edaf2ec0bfba87e4ab1392ee5 100644 (file)
@@ -224,7 +224,7 @@ static int mpls_fill_encap_info(struct sk_buff *skb,
                                struct lwtunnel_state *lwtstate)
 {
        struct mpls_iptunnel_encap *tun_encap_info;
-       
+
        tun_encap_info = mpls_lwtunnel_encap(lwtstate);
 
        if (nla_put_labels(skb, MPLS_IPTUNNEL_DST, tun_encap_info->labels,
index f0a1c536ef15a0d35a3078bf85b5f4bee704f894..6f6c959aeb8f97734e31bfd428d1c98652e56482 100644 (file)
@@ -49,6 +49,8 @@ config NETFILTER_NETLINK_LOG
 config NF_CONNTRACK
        tristate "Netfilter connection tracking support"
        default m if NETFILTER_ADVANCED=n
+       select NF_DEFRAG_IPV4
+       select NF_DEFRAG_IPV6 if IPV6 != n
        help
          Connection tracking keeps a record of what packets have passed
          through your machine, in order to figure out how they are related
@@ -615,7 +617,7 @@ config NFT_SOCKET
        tristate "Netfilter nf_tables socket match support"
        depends on IPV6 || IPV6=n
        select NF_SOCKET_IPV4
-       select NF_SOCKET_IPV6 if IPV6
+       select NF_SOCKET_IPV6 if NF_TABLES_IPV6
        help
          This option allows matching for the presence or absence of a
          corresponding socket and its attributes.
@@ -881,7 +883,7 @@ config NETFILTER_XT_TARGET_LOG
        tristate "LOG target support"
        select NF_LOG_COMMON
        select NF_LOG_IPV4
-       select NF_LOG_IPV6 if IPV6
+       select NF_LOG_IPV6 if IP6_NF_IPTABLES
        default m if NETFILTER_ADVANCED=n
        help
          This option adds a `LOG' target, which allows you to create rules in
@@ -973,7 +975,7 @@ config NETFILTER_XT_TARGET_TEE
        depends on IPV6 || IPV6=n
        depends on !NF_CONNTRACK || NF_CONNTRACK
        select NF_DUP_IPV4
-       select NF_DUP_IPV6 if IPV6
+       select NF_DUP_IPV6 if IP6_NF_IPTABLES
        ---help---
        This option adds a "TEE" target with which a packet can be cloned and
        this clone be rerouted to another nexthop.
@@ -1481,8 +1483,8 @@ config NETFILTER_XT_MATCH_SOCKET
        depends on NETFILTER_ADVANCED
        depends on IPV6 || IPV6=n
        depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
-       depends on NF_SOCKET_IPV4
-       depends on NF_SOCKET_IPV6
+       select NF_SOCKET_IPV4
+       select NF_SOCKET_IPV6 if IP6_NF_IPTABLES
        select NF_DEFRAG_IPV4
        select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
        help
index 8a76dced974d1c10eca35dca78cf2ab284cb2490..dd26e4961f43048ca6495ff92c282a951d689706 100644 (file)
@@ -1,7 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
 
-nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o \
+                  nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o \
+                  nf_conntrack_proto_icmp.o \
+                  nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+
+nf_conntrack-$(subst m,y,$(CONFIG_IPV6)) += nf_conntrack_proto_icmpv6.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
index 168af54db975d188a5224feb0113f05c12b83e0a..dc240cb47ddfac2466c9206dca8d8e0064c5e9e1 100644 (file)
@@ -603,6 +603,21 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
 }
 EXPORT_SYMBOL(nf_conntrack_destroy);
 
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+                        const struct sk_buff *skb)
+{
+       struct nf_ct_hook *ct_hook;
+       bool ret = false;
+
+       rcu_read_lock();
+       ct_hook = rcu_dereference(nf_ct_hook);
+       if (ct_hook)
+               ret = ct_hook->get_tuple_skb(dst_tuple, skb);
+       rcu_read_unlock();
+       return ret;
+}
+EXPORT_SYMBOL(nf_ct_get_tuple_skb);
+
 /* Built-in default zone used e.g. by modules. */
 const struct nf_conntrack_zone nf_ct_zone_dflt = {
        .id     = NF_CT_DEFAULT_ZONE_ID,
index 99e0aa350dc54c5735aedf8da1212c088224d1ee..0edc62910ebfc55e8a4d0ee19f10f918f0599c13 100644 (file)
@@ -825,12 +825,23 @@ static void ip_vs_conn_expire(struct timer_list *t)
 
        /* Unlink conn if not referenced anymore */
        if (likely(ip_vs_conn_unlink(cp))) {
+               struct ip_vs_conn *ct = cp->control;
+
                /* delete the timer if it is activated by other users */
                del_timer(&cp->timer);
 
                /* does anybody control me? */
-               if (cp->control)
+               if (ct) {
                        ip_vs_control_del(cp);
+                       /* Drop CTL or non-assured TPL if not used anymore */
+                       if (!cp->timeout && !atomic_read(&ct->n_control) &&
+                           (!(ct->flags & IP_VS_CONN_F_TEMPLATE) ||
+                            !(ct->state & IP_VS_CTPL_S_ASSURED))) {
+                               IP_VS_DBG(4, "drop controlling connection\n");
+                               ct->timeout = 0;
+                               ip_vs_conn_expire_now(ct);
+                       }
+               }
 
                if ((cp->flags & IP_VS_CONN_F_NFCT) &&
                    !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) {
@@ -872,6 +883,10 @@ static void ip_vs_conn_expire(struct timer_list *t)
 
 /* Modify timer, so that it expires as soon as possible.
  * Can be called without reference only if under RCU lock.
+ * We can have such chain of conns linked with ->control: DATA->CTL->TPL
+ * - DATA (eg. FTP) and TPL (persistence) can be present depending on setup
+ * - cp->timeout=0 indicates all conns from chain should be dropped but
+ * TPL is not dropped if in assured state
  */
 void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
 {
@@ -1107,7 +1122,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
                                &cp->caddr.in6, ntohs(cp->cport),
                                &cp->vaddr.in6, ntohs(cp->vport),
                                dbuf, ntohs(cp->dport),
-                               ip_vs_state_name(cp->protocol, cp->state),
+                               ip_vs_state_name(cp),
                                (cp->timer.expires-jiffies)/HZ, pe_data);
                else
 #endif
@@ -1118,7 +1133,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
                                ntohl(cp->caddr.ip), ntohs(cp->cport),
                                ntohl(cp->vaddr.ip), ntohs(cp->vport),
                                dbuf, ntohs(cp->dport),
-                               ip_vs_state_name(cp->protocol, cp->state),
+                               ip_vs_state_name(cp),
                                (cp->timer.expires-jiffies)/HZ, pe_data);
        }
        return 0;
@@ -1169,7 +1184,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
                                &cp->caddr.in6, ntohs(cp->cport),
                                &cp->vaddr.in6, ntohs(cp->vport),
                                dbuf, ntohs(cp->dport),
-                               ip_vs_state_name(cp->protocol, cp->state),
+                               ip_vs_state_name(cp),
                                ip_vs_origin_name(cp->flags),
                                (cp->timer.expires-jiffies)/HZ);
                else
@@ -1181,7 +1196,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
                                ntohl(cp->caddr.ip), ntohs(cp->cport),
                                ntohl(cp->vaddr.ip), ntohs(cp->vport),
                                dbuf, ntohs(cp->dport),
-                               ip_vs_state_name(cp->protocol, cp->state),
+                               ip_vs_state_name(cp),
                                ip_vs_origin_name(cp->flags),
                                (cp->timer.expires-jiffies)/HZ);
        }
@@ -1197,8 +1212,11 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
 #endif
 
 
-/*
- *      Randomly drop connection entries before running out of memory
+/* Randomly drop connection entries before running out of memory
+ * Can be used for DATA and CTL conns. For TPL conns there are exceptions:
+ * - traffic for services in OPS mode increases ct->in_pkts, so it is supported
+ * - traffic for services not in OPS mode does not increase ct->in_pkts in
+ * all cases, so it is not supported
  */
 static inline int todrop_entry(struct ip_vs_conn *cp)
 {
@@ -1242,7 +1260,7 @@ static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp)
 void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
 {
        int idx;
-       struct ip_vs_conn *cp, *cp_c;
+       struct ip_vs_conn *cp;
 
        rcu_read_lock();
        /*
@@ -1254,13 +1272,15 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
                hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
                        if (cp->ipvs != ipvs)
                                continue;
+                       if (atomic_read(&cp->n_control))
+                               continue;
                        if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
-                               if (atomic_read(&cp->n_control) ||
-                                   !ip_vs_conn_ops_mode(cp))
-                                       continue;
-                               else
-                                       /* connection template of OPS */
+                               /* connection template of OPS */
+                               if (ip_vs_conn_ops_mode(cp))
                                        goto try_drop;
+                               if (!(cp->state & IP_VS_CTPL_S_ASSURED))
+                                       goto drop;
+                               continue;
                        }
                        if (cp->protocol == IPPROTO_TCP) {
                                switch(cp->state) {
@@ -1294,15 +1314,10 @@ try_drop:
                                        continue;
                        }
 
-                       IP_VS_DBG(4, "del connection\n");
+drop:
+                       IP_VS_DBG(4, "drop connection\n");
+                       cp->timeout = 0;
                        ip_vs_conn_expire_now(cp);
-                       cp_c = cp->control;
-                       /* cp->control is valid only with reference to cp */
-                       if (cp_c && __ip_vs_conn_get(cp)) {
-                               IP_VS_DBG(4, "del conn template\n");
-                               ip_vs_conn_expire_now(cp_c);
-                               __ip_vs_conn_put(cp);
-                       }
                }
                cond_resched_rcu();
        }
@@ -1325,15 +1340,19 @@ flush_again:
                hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
                        if (cp->ipvs != ipvs)
                                continue;
-                       IP_VS_DBG(4, "del connection\n");
-                       ip_vs_conn_expire_now(cp);
+                       /* As timers are expired in LIFO order, restart
+                        * the timer of controlling connection first, so
+                        * that it is expired after us.
+                        */
                        cp_c = cp->control;
                        /* cp->control is valid only with reference to cp */
                        if (cp_c && __ip_vs_conn_get(cp)) {
-                               IP_VS_DBG(4, "del conn template\n");
+                               IP_VS_DBG(4, "del controlling connection\n");
                                ip_vs_conn_expire_now(cp_c);
                                __ip_vs_conn_put(cp);
                        }
+                       IP_VS_DBG(4, "del connection\n");
+                       ip_vs_conn_expire_now(cp);
                }
                cond_resched_rcu();
        }
index dd21782e2f12fc30d5bd227f3c33bc9362c72942..62eefea489732d6d11195e98388ded730d963283 100644 (file)
@@ -134,7 +134,7 @@ static void update_defense_level(struct netns_ipvs *ipvs)
                } else {
                        atomic_set(&ipvs->dropentry, 0);
                        ipvs->sysctl_drop_entry = 1;
-               };
+               }
                break;
        case 3:
                atomic_set(&ipvs->dropentry, 1);
index ca880a3ad033aec3f55641c3cd485098983a8722..54ee84adf0bdea67428b124a54d9820fa33361ba 100644 (file)
 
 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
 
+/* States for conn templates: NONE or words separated with ",", max 15 chars */
+static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = {
+       [IP_VS_CTPL_S_NONE]                     = "NONE",
+       [IP_VS_CTPL_S_ASSURED]                  = "ASSURED",
+};
 
 /*
  *     register an ipvs protocol
@@ -193,12 +198,20 @@ ip_vs_create_timeout_table(int *table, int size)
 }
 
 
-const char * ip_vs_state_name(__u16 proto, int state)
+const char *ip_vs_state_name(const struct ip_vs_conn *cp)
 {
-       struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+       unsigned int state = cp->state;
+       struct ip_vs_protocol *pp;
+
+       if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
 
+               if (state >= IP_VS_CTPL_S_LAST)
+                       return "ERR!";
+               return ip_vs_ctpl_state_name_table[state] ? : "?";
+       }
+       pp = ip_vs_proto_get(cp->protocol);
        if (pp == NULL || pp->state_name == NULL)
-               return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
+               return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!";
        return pp->state_name(state);
 }
 
index 3250c4a1111e27046c797c703524e80166c80a34..b0cd7d08f2a7a2692657dd751ae655576a37a81d 100644 (file)
@@ -461,6 +461,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                                cp->flags &= ~IP_VS_CONN_F_INACTIVE;
                        }
                }
+               if (next_state == IP_VS_SCTP_S_ESTABLISHED)
+                       ip_vs_control_assure_ct(cp);
        }
        if (likely(pd))
                cp->timeout = pd->timeout_table[cp->state = next_state];
index 80d10ad12a15f686a68e0457ac3701b605a764c9..1770fc6ce960e4a69e09d29573a3df753a013441 100644 (file)
@@ -569,6 +569,8 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                                cp->flags &= ~IP_VS_CONN_F_INACTIVE;
                        }
                }
+               if (new_state == IP_VS_TCP_S_ESTABLISHED)
+                       ip_vs_control_assure_ct(cp);
        }
 
        if (likely(pd))
index e0ef11c3691e49deebd7ae4204cc4ad8bce53cc7..0f53c49025f8799da71bb1a41dc407435a27013e 100644 (file)
@@ -460,6 +460,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
        }
 
        cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
+       if (direction == IP_VS_DIR_OUTPUT)
+               ip_vs_control_assure_ct(cp);
 }
 
 static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
index 001501e25625fcd3f87a22fec4c4e013c90b3b8d..d4020c5e831d3020a6e412ead6d1895f81b5a124 100644 (file)
@@ -1003,12 +1003,9 @@ static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer
                                continue;
                        }
                } else {
-                       /* protocol in templates is not used for state/timeout */
-                       if (state > 0) {
-                               IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
-                                       state);
-                               state = 0;
-                       }
+                       if (state >= IP_VS_CTPL_S_LAST)
+                               IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n",
+                                         state);
                }
 
                ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
@@ -1166,12 +1163,9 @@ static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *m
                        goto out;
                }
        } else {
-               /* protocol in templates is not used for state/timeout */
-               if (state > 0) {
-                       IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
-                               state);
-                       state = 0;
-               }
+               if (state >= IP_VS_CTPL_S_LAST)
+                       IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n",
+                                 state);
        }
        if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
                                       pe_data_len, pe_name, pe_name_len)) {
index 510039862aa93c99904d2dbd3a7969327d0d896a..02ca7df793f5c07233924c051bbbd9faf60854d4 100644 (file)
 
 /* we will save the tuples of all connections we care about */
 struct nf_conncount_tuple {
-       struct hlist_node               node;
+       struct list_head                node;
        struct nf_conntrack_tuple       tuple;
        struct nf_conntrack_zone        zone;
        int                             cpu;
        u32                             jiffies32;
+       struct rcu_head                 rcu_head;
 };
 
 struct nf_conncount_rb {
        struct rb_node node;
-       struct hlist_head hhead; /* connections/hosts in same subnet */
+       struct nf_conncount_list list;
        u32 key[MAX_KEYLEN];
+       struct rcu_head rcu_head;
 };
 
 static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
@@ -62,6 +64,10 @@ static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_i
 struct nf_conncount_data {
        unsigned int keylen;
        struct rb_root root[CONNCOUNT_SLOTS];
+       struct net *net;
+       struct work_struct gc_work;
+       unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)];
+       unsigned int gc_tree;
 };
 
 static u_int32_t conncount_rnd __read_mostly;
@@ -82,26 +88,70 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
        return memcmp(a, b, klen * sizeof(u32));
 }
 
-bool nf_conncount_add(struct hlist_head *head,
-                     const struct nf_conntrack_tuple *tuple,
-                     const struct nf_conntrack_zone *zone)
+enum nf_conncount_list_add
+nf_conncount_add(struct nf_conncount_list *list,
+                const struct nf_conntrack_tuple *tuple,
+                const struct nf_conntrack_zone *zone)
 {
        struct nf_conncount_tuple *conn;
 
+       if (WARN_ON_ONCE(list->count > INT_MAX))
+               return NF_CONNCOUNT_ERR;
+
        conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
        if (conn == NULL)
-               return false;
+               return NF_CONNCOUNT_ERR;
+
        conn->tuple = *tuple;
        conn->zone = *zone;
        conn->cpu = raw_smp_processor_id();
        conn->jiffies32 = (u32)jiffies;
-       hlist_add_head(&conn->node, head);
-       return true;
+       spin_lock(&list->list_lock);
+       if (list->dead == true) {
+               kmem_cache_free(conncount_conn_cachep, conn);
+               spin_unlock(&list->list_lock);
+               return NF_CONNCOUNT_SKIP;
+       }
+       list_add_tail(&conn->node, &list->head);
+       list->count++;
+       spin_unlock(&list->list_lock);
+       return NF_CONNCOUNT_ADDED;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_add);
 
+static void __conn_free(struct rcu_head *h)
+{
+       struct nf_conncount_tuple *conn;
+
+       conn = container_of(h, struct nf_conncount_tuple, rcu_head);
+       kmem_cache_free(conncount_conn_cachep, conn);
+}
+
+static bool conn_free(struct nf_conncount_list *list,
+                     struct nf_conncount_tuple *conn)
+{
+       bool free_entry = false;
+
+       spin_lock(&list->list_lock);
+
+       if (list->count == 0) {
+               spin_unlock(&list->list_lock);
+                return free_entry;
+       }
+
+       list->count--;
+       list_del_rcu(&conn->node);
+       if (list->count == 0)
+               free_entry = true;
+
+       spin_unlock(&list->list_lock);
+       call_rcu(&conn->rcu_head, __conn_free);
+       return free_entry;
+}
+
 static const struct nf_conntrack_tuple_hash *
-find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
+find_or_evict(struct net *net, struct nf_conncount_list *list,
+             struct nf_conncount_tuple *conn, bool *free_entry)
 {
        const struct nf_conntrack_tuple_hash *found;
        unsigned long a, b;
@@ -121,34 +171,37 @@ find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
         */
        age = a - b;
        if (conn->cpu == cpu || age >= 2) {
-               hlist_del(&conn->node);
-               kmem_cache_free(conncount_conn_cachep, conn);
+               *free_entry = conn_free(list, conn);
                return ERR_PTR(-ENOENT);
        }
 
        return ERR_PTR(-EAGAIN);
 }
 
-unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
-                                const struct nf_conntrack_tuple *tuple,
-                                const struct nf_conntrack_zone *zone,
-                                bool *addit)
+void nf_conncount_lookup(struct net *net,
+                        struct nf_conncount_list *list,
+                        const struct nf_conntrack_tuple *tuple,
+                        const struct nf_conntrack_zone *zone,
+                        bool *addit)
 {
        const struct nf_conntrack_tuple_hash *found;
-       struct nf_conncount_tuple *conn;
+       struct nf_conncount_tuple *conn, *conn_n;
        struct nf_conn *found_ct;
-       struct hlist_node *n;
-       unsigned int length = 0;
+       unsigned int collect = 0;
+       bool free_entry = false;
 
+       /* best effort only */
        *addit = tuple ? true : false;
 
        /* check the saved connections */
-       hlist_for_each_entry_safe(conn, n, head, node) {
-               found = find_or_evict(net, conn);
+       list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+               if (collect > CONNCOUNT_GC_MAX_NODES)
+                       break;
+
+               found = find_or_evict(net, list, conn, &free_entry);
                if (IS_ERR(found)) {
                        /* Not found, but might be about to be confirmed */
                        if (PTR_ERR(found) == -EAGAIN) {
-                               length++;
                                if (!tuple)
                                        continue;
 
@@ -156,7 +209,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
                                    nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
                                    nf_ct_zone_id(zone, zone->dir))
                                        *addit = false;
-                       }
+                       } else if (PTR_ERR(found) == -ENOENT)
+                               collect++;
                        continue;
                }
 
@@ -165,9 +219,10 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
                if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
                    nf_ct_zone_equal(found_ct, zone, zone->dir)) {
                        /*
-                        * Just to be sure we have it only once in the list.
                         * We should not see tuples twice unless someone hooks
                         * this into a table without "-p tcp --syn".
+                        *
+                        * Attempt to avoid a re-add in this case.
                         */
                        *addit = false;
                } else if (already_closed(found_ct)) {
@@ -176,19 +231,75 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
                         * closed already -> ditch it
                         */
                        nf_ct_put(found_ct);
-                       hlist_del(&conn->node);
-                       kmem_cache_free(conncount_conn_cachep, conn);
+                       conn_free(list, conn);
+                       collect++;
                        continue;
                }
 
                nf_ct_put(found_ct);
-               length++;
        }
-
-       return length;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_lookup);
 
+void nf_conncount_list_init(struct nf_conncount_list *list)
+{
+       spin_lock_init(&list->list_lock);
+       INIT_LIST_HEAD(&list->head);
+       list->count = 1;
+       list->dead = false;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_list_init);
+
+/* Return true if the list is empty */
+bool nf_conncount_gc_list(struct net *net,
+                         struct nf_conncount_list *list)
+{
+       const struct nf_conntrack_tuple_hash *found;
+       struct nf_conncount_tuple *conn, *conn_n;
+       struct nf_conn *found_ct;
+       unsigned int collected = 0;
+       bool free_entry = false;
+
+       list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+               found = find_or_evict(net, list, conn, &free_entry);
+               if (IS_ERR(found)) {
+                       if (PTR_ERR(found) == -ENOENT)  {
+                               if (free_entry)
+                                       return true;
+                               collected++;
+                       }
+                       continue;
+               }
+
+               found_ct = nf_ct_tuplehash_to_ctrack(found);
+               if (already_closed(found_ct)) {
+                       /*
+                        * we do not care about connections which are
+                        * closed already -> ditch it
+                        */
+                       nf_ct_put(found_ct);
+                       if (conn_free(list, conn))
+                               return true;
+                       collected++;
+                       continue;
+               }
+
+               nf_ct_put(found_ct);
+               if (collected > CONNCOUNT_GC_MAX_NODES)
+                       return false;
+       }
+       return false;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
+
+static void __tree_nodes_free(struct rcu_head *h)
+{
+       struct nf_conncount_rb *rbconn;
+
+       rbconn = container_of(h, struct nf_conncount_rb, rcu_head);
+       kmem_cache_free(conncount_rb_cachep, rbconn);
+}
+
 static void tree_nodes_free(struct rb_root *root,
                            struct nf_conncount_rb *gc_nodes[],
                            unsigned int gc_count)
@@ -197,32 +308,46 @@ static void tree_nodes_free(struct rb_root *root,
 
        while (gc_count) {
                rbconn = gc_nodes[--gc_count];
-               rb_erase(&rbconn->node, root);
-               kmem_cache_free(conncount_rb_cachep, rbconn);
+               spin_lock(&rbconn->list.list_lock);
+               if (rbconn->list.count == 0 && rbconn->list.dead == false) {
+                       rbconn->list.dead = true;
+                       rb_erase(&rbconn->node, root);
+                       call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+               }
+               spin_unlock(&rbconn->list.list_lock);
        }
 }
 
+static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
+{
+       set_bit(tree, data->pending_trees);
+       schedule_work(&data->gc_work);
+}
+
 static unsigned int
-count_tree(struct net *net, struct rb_root *root,
-          const u32 *key, u8 keylen,
-          const struct nf_conntrack_tuple *tuple,
-          const struct nf_conntrack_zone *zone)
+insert_tree(struct net *net,
+           struct nf_conncount_data *data,
+           struct rb_root *root,
+           unsigned int hash,
+           const u32 *key,
+           u8 keylen,
+           const struct nf_conntrack_tuple *tuple,
+           const struct nf_conntrack_zone *zone)
 {
+       enum nf_conncount_list_add ret;
        struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
        struct rb_node **rbnode, *parent;
        struct nf_conncount_rb *rbconn;
        struct nf_conncount_tuple *conn;
-       unsigned int gc_count;
-       bool no_gc = false;
+       unsigned int count = 0, gc_count = 0;
+       bool node_found = false;
+
+       spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
 
- restart:
-       gc_count = 0;
        parent = NULL;
        rbnode = &(root->rb_node);
        while (*rbnode) {
                int diff;
-               bool addit;
-
                rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
 
                parent = *rbnode;
@@ -232,33 +357,30 @@ count_tree(struct net *net, struct rb_root *root,
                } else if (diff > 0) {
                        rbnode = &((*rbnode)->rb_right);
                } else {
-                       /* same source network -> be counted! */
-                       unsigned int count;
-
-                       count = nf_conncount_lookup(net, &rbconn->hhead, tuple,
-                                                   zone, &addit);
-
-                       tree_nodes_free(root, gc_nodes, gc_count);
-                       if (!addit)
-                               return count;
-
-                       if (!nf_conncount_add(&rbconn->hhead, tuple, zone))
-                               return 0; /* hotdrop */
-
-                       return count + 1;
+                       /* unlikely: other cpu added node already */
+                       node_found = true;
+                       ret = nf_conncount_add(&rbconn->list, tuple, zone);
+                       if (ret == NF_CONNCOUNT_ERR) {
+                               count = 0; /* hotdrop */
+                       } else if (ret == NF_CONNCOUNT_ADDED) {
+                               count = rbconn->list.count;
+                       } else {
+                               /* NF_CONNCOUNT_SKIP, rbconn is already
+                                * reclaimed by gc, insert a new tree node
+                                */
+                               node_found = false;
+                       }
+                       break;
                }
 
-               if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
+               if (gc_count >= ARRAY_SIZE(gc_nodes))
                        continue;
 
-               /* only used for GC on hhead, retval and 'addit' ignored */
-               nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit);
-               if (hlist_empty(&rbconn->hhead))
+               if (nf_conncount_gc_list(net, &rbconn->list))
                        gc_nodes[gc_count++] = rbconn;
        }
 
        if (gc_count) {
-               no_gc = true;
                tree_nodes_free(root, gc_nodes, gc_count);
                /* tree_node_free before new allocation permits
                 * allocator to re-use newly free'd object.
@@ -266,58 +388,146 @@ count_tree(struct net *net, struct rb_root *root,
                 * This is a rare event; in most cases we will find
                 * existing node to re-use. (or gc_count is 0).
                 */
-               goto restart;
+
+               if (gc_count >= ARRAY_SIZE(gc_nodes))
+                       schedule_gc_worker(data, hash);
        }
 
-       if (!tuple)
-               return 0;
+       if (node_found)
+               goto out_unlock;
 
-       /* no match, need to insert new node */
+       /* expected case: match, insert new node */
        rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
        if (rbconn == NULL)
-               return 0;
+               goto out_unlock;
 
        conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
        if (conn == NULL) {
                kmem_cache_free(conncount_rb_cachep, rbconn);
-               return 0;
+               goto out_unlock;
        }
 
        conn->tuple = *tuple;
        conn->zone = *zone;
        memcpy(rbconn->key, key, sizeof(u32) * keylen);
 
-       INIT_HLIST_HEAD(&rbconn->hhead);
-       hlist_add_head(&conn->node, &rbconn->hhead);
+       nf_conncount_list_init(&rbconn->list);
+       list_add(&conn->node, &rbconn->list.head);
+       count = 1;
 
        rb_link_node(&rbconn->node, parent, rbnode);
        rb_insert_color(&rbconn->node, root);
-       return 1;
+out_unlock:
+       spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+       return count;
 }
 
-/* Count and return number of conntrack entries in 'net' with particular 'key'.
- * If 'tuple' is not null, insert it into the accounting data structure.
- */
-unsigned int nf_conncount_count(struct net *net,
-                               struct nf_conncount_data *data,
-                               const u32 *key,
-                               const struct nf_conntrack_tuple *tuple,
-                               const struct nf_conntrack_zone *zone)
+static unsigned int
+count_tree(struct net *net,
+          struct nf_conncount_data *data,
+          const u32 *key,
+          const struct nf_conntrack_tuple *tuple,
+          const struct nf_conntrack_zone *zone)
 {
+       enum nf_conncount_list_add ret;
        struct rb_root *root;
-       int count;
-       u32 hash;
+       struct rb_node *parent;
+       struct nf_conncount_rb *rbconn;
+       unsigned int hash;
+       u8 keylen = data->keylen;
 
        hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
        root = &data->root[hash];
 
-       spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+       parent = rcu_dereference_raw(root->rb_node);
+       while (parent) {
+               int diff;
+               bool addit;
 
-       count = count_tree(net, root, key, data->keylen, tuple, zone);
+               rbconn = rb_entry(parent, struct nf_conncount_rb, node);
 
-       spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
+               diff = key_diff(key, rbconn->key, keylen);
+               if (diff < 0) {
+                       parent = rcu_dereference_raw(parent->rb_left);
+               } else if (diff > 0) {
+                       parent = rcu_dereference_raw(parent->rb_right);
+               } else {
+                       /* same source network -> be counted! */
+                       nf_conncount_lookup(net, &rbconn->list, tuple, zone,
+                                           &addit);
 
-       return count;
+                       if (!addit)
+                               return rbconn->list.count;
+
+                       ret = nf_conncount_add(&rbconn->list, tuple, zone);
+                       if (ret == NF_CONNCOUNT_ERR) {
+                               return 0; /* hotdrop */
+                       } else if (ret == NF_CONNCOUNT_ADDED) {
+                               return rbconn->list.count;
+                       } else {
+                               /* NF_CONNCOUNT_SKIP, rbconn is already
+                                * reclaimed by gc, insert a new tree node
+                                */
+                               break;
+                       }
+               }
+       }
+
+       if (!tuple)
+               return 0;
+
+       return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
+}
+
+static void tree_gc_worker(struct work_struct *work)
+{
+       struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work);
+       struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn;
+       struct rb_root *root;
+       struct rb_node *node;
+       unsigned int tree, next_tree, gc_count = 0;
+
+       tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
+       root = &data->root[tree];
+
+       rcu_read_lock();
+       for (node = rb_first(root); node != NULL; node = rb_next(node)) {
+               rbconn = rb_entry(node, struct nf_conncount_rb, node);
+               if (nf_conncount_gc_list(data->net, &rbconn->list))
+                       gc_nodes[gc_count++] = rbconn;
+       }
+       rcu_read_unlock();
+
+       spin_lock_bh(&nf_conncount_locks[tree]);
+
+       if (gc_count) {
+               tree_nodes_free(root, gc_nodes, gc_count);
+       }
+
+       clear_bit(tree, data->pending_trees);
+
+       next_tree = (tree + 1) % CONNCOUNT_SLOTS;
+       next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
+
+       if (next_tree < CONNCOUNT_SLOTS) {
+               data->gc_tree = next_tree;
+               schedule_work(work);
+       }
+
+       spin_unlock_bh(&nf_conncount_locks[tree]);
+}
+
+/* Count and return number of conntrack entries in 'net' with particular 'key'.
+ * If 'tuple' is not null, insert it into the accounting data structure.
+ * Call with RCU read lock.
+ */
+unsigned int nf_conncount_count(struct net *net,
+                               struct nf_conncount_data *data,
+                               const u32 *key,
+                               const struct nf_conntrack_tuple *tuple,
+                               const struct nf_conntrack_zone *zone)
+{
+       return count_tree(net, data, key, tuple, zone);
 }
 EXPORT_SYMBOL_GPL(nf_conncount_count);
 
@@ -348,17 +558,18 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
                data->root[i] = RB_ROOT;
 
        data->keylen = keylen / sizeof(u32);
+       data->net = net;
+       INIT_WORK(&data->gc_work, tree_gc_worker);
 
        return data;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_init);
 
-void nf_conncount_cache_free(struct hlist_head *hhead)
+void nf_conncount_cache_free(struct nf_conncount_list *list)
 {
-       struct nf_conncount_tuple *conn;
-       struct hlist_node *n;
+       struct nf_conncount_tuple *conn, *conn_n;
 
-       hlist_for_each_entry_safe(conn, n, hhead, node)
+       list_for_each_entry_safe(conn, conn_n, &list->head, node)
                kmem_cache_free(conncount_conn_cachep, conn);
 }
 EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
@@ -373,7 +584,7 @@ static void destroy_tree(struct rb_root *r)
 
                rb_erase(node, r);
 
-               nf_conncount_cache_free(&rbconn->hhead);
+               nf_conncount_cache_free(&rbconn->list);
 
                kmem_cache_free(conncount_rb_cachep, rbconn);
        }
@@ -384,6 +595,7 @@ void nf_conncount_destroy(struct net *net, unsigned int family,
 {
        unsigned int i;
 
+       cancel_work_sync(&data->gc_work);
        nf_ct_netns_put(net, family);
 
        for (i = 0; i < ARRAY_SIZE(data->root); ++i)
index a1086bdec2429c2d26d4cbb6b2a12bd8927b013d..5423b197d98a2b49e2ecc6e6de901702302f834e 100644 (file)
@@ -32,7 +32,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
        __be32 mask = 0;
 
        /* we're only interested in locally generated packets */
-       if (skb->sk == NULL)
+       if (skb->sk == NULL || !net_eq(nf_ct_net(ct), sock_net(skb->sk)))
                goto out;
        if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
                goto out;
index 3d52804250274602c521f3cfe6c0c3b8fa9e78e9..8a113ca1eea22703b5f1c9ac19184231a18bd692 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -55,6 +54,7 @@
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netns/hash.h>
+#include <net/ip.h>
 
 #include "nf_internals.h"
 
@@ -222,7 +222,7 @@ static u32 hash_conntrack(const struct net *net,
        return scale_hash(hash_conntrack_raw(tuple, net));
 }
 
-bool
+static bool
 nf_ct_get_tuple(const struct sk_buff *skb,
                unsigned int nhoff,
                unsigned int dataoff,
@@ -230,37 +230,151 @@ nf_ct_get_tuple(const struct sk_buff *skb,
                u_int8_t protonum,
                struct net *net,
                struct nf_conntrack_tuple *tuple,
-               const struct nf_conntrack_l3proto *l3proto,
                const struct nf_conntrack_l4proto *l4proto)
 {
+       unsigned int size;
+       const __be32 *ap;
+       __be32 _addrs[8];
+       struct {
+               __be16 sport;
+               __be16 dport;
+       } _inet_hdr, *inet_hdr;
+
        memset(tuple, 0, sizeof(*tuple));
 
        tuple->src.l3num = l3num;
-       if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
+       switch (l3num) {
+       case NFPROTO_IPV4:
+               nhoff += offsetof(struct iphdr, saddr);
+               size = 2 * sizeof(__be32);
+               break;
+       case NFPROTO_IPV6:
+               nhoff += offsetof(struct ipv6hdr, saddr);
+               size = sizeof(_addrs);
+               break;
+       default:
+               return true;
+       }
+
+       ap = skb_header_pointer(skb, nhoff, size, _addrs);
+       if (!ap)
                return false;
 
+       switch (l3num) {
+       case NFPROTO_IPV4:
+               tuple->src.u3.ip = ap[0];
+               tuple->dst.u3.ip = ap[1];
+               break;
+       case NFPROTO_IPV6:
+               memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
+               memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
+               break;
+       }
+
        tuple->dst.protonum = protonum;
        tuple->dst.dir = IP_CT_DIR_ORIGINAL;
 
-       return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
+       if (unlikely(l4proto->pkt_to_tuple))
+               return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
+
+       /* Actually only need first 4 bytes to get ports. */
+       inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
+       if (!inet_hdr)
+               return false;
+
+       tuple->src.u.udp.port = inet_hdr->sport;
+       tuple->dst.u.udp.port = inet_hdr->dport;
+       return true;
+}
+
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+                           u_int8_t *protonum)
+{
+       int dataoff = -1;
+       const struct iphdr *iph;
+       struct iphdr _iph;
+
+       iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+       if (!iph)
+               return -1;
+
+       /* Conntrack defragments packets, we might still see fragments
+        * inside ICMP packets though.
+        */
+       if (iph->frag_off & htons(IP_OFFSET))
+               return -1;
+
+       dataoff = nhoff + (iph->ihl << 2);
+       *protonum = iph->protocol;
+
+       /* Check bogus IP headers */
+       if (dataoff > skb->len) {
+               pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
+                        nhoff, iph->ihl << 2, skb->len);
+               return -1;
+       }
+       return dataoff;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+                           u8 *protonum)
+{
+       int protoff = -1;
+       unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+       __be16 frag_off;
+       u8 nexthdr;
+
+       if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+                         &nexthdr, sizeof(nexthdr)) != 0) {
+               pr_debug("can't get nexthdr\n");
+               return -1;
+       }
+       protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
+       /*
+        * (protoff == skb->len) means the packet has not data, just
+        * IPv6 and possibly extensions headers, but it is tracked anyway
+        */
+       if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+               pr_debug("can't find proto in pkt\n");
+               return -1;
+       }
+
+       *protonum = nexthdr;
+       return protoff;
+}
+#endif
+
+static int get_l4proto(const struct sk_buff *skb,
+                      unsigned int nhoff, u8 pf, u8 *l4num)
+{
+       switch (pf) {
+       case NFPROTO_IPV4:
+               return ipv4_get_l4proto(skb, nhoff, l4num);
+#if IS_ENABLED(CONFIG_IPV6)
+       case NFPROTO_IPV6:
+               return ipv6_get_l4proto(skb, nhoff, l4num);
+#endif
+       default:
+               *l4num = 0;
+               break;
+       }
+       return -1;
 }
-EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
 
 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
                       u_int16_t l3num,
                       struct net *net, struct nf_conntrack_tuple *tuple)
 {
-       const struct nf_conntrack_l3proto *l3proto;
        const struct nf_conntrack_l4proto *l4proto;
-       unsigned int protoff;
-       u_int8_t protonum;
+       u8 protonum;
+       int protoff;
        int ret;
 
        rcu_read_lock();
 
-       l3proto = __nf_ct_l3proto_find(l3num);
-       ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
-       if (ret != NF_ACCEPT) {
+       protoff = get_l4proto(skb, nhoff, l3num, &protonum);
+       if (protoff <= 0) {
                rcu_read_unlock();
                return false;
        }
@@ -268,7 +382,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
        l4proto = __nf_ct_l4proto_find(l3num, protonum);
 
        ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
-                             l3proto, l4proto);
+                             l4proto);
 
        rcu_read_unlock();
        return ret;
@@ -278,19 +392,35 @@ EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
 bool
 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
                   const struct nf_conntrack_tuple *orig,
-                  const struct nf_conntrack_l3proto *l3proto,
                   const struct nf_conntrack_l4proto *l4proto)
 {
        memset(inverse, 0, sizeof(*inverse));
 
        inverse->src.l3num = orig->src.l3num;
-       if (l3proto->invert_tuple(inverse, orig) == 0)
-               return false;
+
+       switch (orig->src.l3num) {
+       case NFPROTO_IPV4:
+               inverse->src.u3.ip = orig->dst.u3.ip;
+               inverse->dst.u3.ip = orig->src.u3.ip;
+               break;
+       case NFPROTO_IPV6:
+               inverse->src.u3.in6 = orig->dst.u3.in6;
+               inverse->dst.u3.in6 = orig->src.u3.in6;
+               break;
+       default:
+               break;
+       }
 
        inverse->dst.dir = !orig->dst.dir;
 
        inverse->dst.protonum = orig->dst.protonum;
-       return l4proto->invert_tuple(inverse, orig);
+
+       if (unlikely(l4proto->invert_tuple))
+               return l4proto->invert_tuple(inverse, orig);
+
+       inverse->src.u.all = orig->dst.u.all;
+       inverse->dst.u.all = orig->src.u.all;
+       return true;
 }
 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
 
@@ -502,6 +632,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
               net_eq(net, nf_ct_net(ct));
 }
 
+static inline bool
+nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
+{
+       return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
+              nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) &&
+              nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) &&
+              nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) &&
+              net_eq(nf_ct_net(ct1), nf_ct_net(ct2));
+}
+
 /* caller must hold rcu readlock and none of the nf_conntrack_locks */
 static void nf_ct_gc_expired(struct nf_conn *ct)
 {
@@ -695,19 +837,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
        /* This is the conntrack entry already in hashes that won race. */
        struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
        const struct nf_conntrack_l4proto *l4proto;
+       enum ip_conntrack_info oldinfo;
+       struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
 
        l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
        if (l4proto->allow_clash &&
-           ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
            !nf_ct_is_dying(ct) &&
            atomic_inc_not_zero(&ct->ct_general.use)) {
-               enum ip_conntrack_info oldinfo;
-               struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
-
-               nf_ct_acct_merge(ct, ctinfo, loser_ct);
-               nf_conntrack_put(&loser_ct->ct_general);
-               nf_ct_set(skb, ct, oldinfo);
-               return NF_ACCEPT;
+               if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
+                   nf_ct_match(ct, loser_ct)) {
+                       nf_ct_acct_merge(ct, ctinfo, loser_ct);
+                       nf_conntrack_put(&loser_ct->ct_general);
+                       nf_ct_set(skb, ct, oldinfo);
+                       return NF_ACCEPT;
+               }
+               nf_ct_put(ct);
        }
        NF_CT_STAT_INC(net, drop);
        return NF_DROP;
@@ -1195,7 +1339,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 static noinline struct nf_conntrack_tuple_hash *
 init_conntrack(struct net *net, struct nf_conn *tmpl,
               const struct nf_conntrack_tuple *tuple,
-              const struct nf_conntrack_l3proto *l3proto,
               const struct nf_conntrack_l4proto *l4proto,
               struct sk_buff *skb,
               unsigned int dataoff, u32 hash)
@@ -1208,9 +1351,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
        const struct nf_conntrack_zone *zone;
        struct nf_conn_timeout *timeout_ext;
        struct nf_conntrack_zone tmp;
-       unsigned int *timeouts;
 
-       if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
+       if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
                pr_debug("Can't invert tuple.\n");
                return NULL;
        }
@@ -1227,15 +1369,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
        }
 
        timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
-       if (timeout_ext) {
-               timeouts = nf_ct_timeout_data(timeout_ext);
-               if (unlikely(!timeouts))
-                       timeouts = l4proto->get_timeouts(net);
-       } else {
-               timeouts = l4proto->get_timeouts(net);
-       }
 
-       if (!l4proto->new(ct, skb, dataoff, timeouts)) {
+       if (!l4proto->new(ct, skb, dataoff)) {
                nf_conntrack_free(ct);
                pr_debug("can't track with proto module\n");
                return NULL;
@@ -1266,8 +1401,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
                        /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
                        ct->master = exp->master;
                        if (exp->helper) {
-                               help = nf_ct_helper_ext_add(ct, exp->helper,
-                                                           GFP_ATOMIC);
+                               help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
                                if (help)
                                        rcu_assign_pointer(help->helper, exp->helper);
                        }
@@ -1307,7 +1441,6 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
                  unsigned int dataoff,
                  u_int16_t l3num,
                  u_int8_t protonum,
-                 const struct nf_conntrack_l3proto *l3proto,
                  const struct nf_conntrack_l4proto *l4proto)
 {
        const struct nf_conntrack_zone *zone;
@@ -1319,8 +1452,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
        u32 hash;
 
        if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
-                            dataoff, l3num, protonum, net, &tuple, l3proto,
-                            l4proto)) {
+                            dataoff, l3num, protonum, net, &tuple, l4proto)) {
                pr_debug("Can't get tuple\n");
                return 0;
        }
@@ -1330,7 +1462,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
        hash = hash_conntrack_raw(&tuple, net);
        h = __nf_conntrack_find_get(net, zone, &tuple, hash);
        if (!h) {
-               h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
+               h = init_conntrack(net, tmpl, &tuple, l4proto,
                                   skb, dataoff, hash);
                if (!h)
                        return 0;
@@ -1363,14 +1495,11 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                struct sk_buff *skb)
 {
-       const struct nf_conntrack_l3proto *l3proto;
        const struct nf_conntrack_l4proto *l4proto;
        struct nf_conn *ct, *tmpl;
        enum ip_conntrack_info ctinfo;
-       unsigned int *timeouts;
-       unsigned int dataoff;
        u_int8_t protonum;
-       int ret;
+       int dataoff, ret;
 
        tmpl = nf_ct_get(skb, &ctinfo);
        if (tmpl || ctinfo == IP_CT_UNTRACKED) {
@@ -1384,14 +1513,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
        }
 
        /* rcu_read_lock()ed by nf_hook_thresh */
-       l3proto = __nf_ct_l3proto_find(pf);
-       ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
-                                  &dataoff, &protonum);
-       if (ret <= 0) {
+       dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
+       if (dataoff <= 0) {
                pr_debug("not prepared to track yet or error occurred\n");
                NF_CT_STAT_INC_ATOMIC(net, error);
                NF_CT_STAT_INC_ATOMIC(net, invalid);
-               ret = -ret;
+               ret = NF_ACCEPT;
                goto out;
        }
 
@@ -1413,8 +1540,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                        goto out;
        }
 repeat:
-       ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
-                               l3proto, l4proto);
+       ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
        if (ret < 0) {
                /* Too stressed to deal. */
                NF_CT_STAT_INC_ATOMIC(net, drop);
@@ -1430,10 +1556,7 @@ repeat:
                goto out;
        }
 
-       /* Decide what timeout policy we want to apply to this flow. */
-       timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
-
-       ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
+       ret = l4proto->packet(ct, skb, dataoff, ctinfo);
        if (ret <= 0) {
                /* Invalid: inverse of the return code tells
                 * the netfilter core what to do */
@@ -1471,7 +1594,6 @@ bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
 
        rcu_read_lock();
        ret = nf_ct_invert_tuple(inverse, orig,
-                                __nf_ct_l3proto_find(orig->src.l3num),
                                 __nf_ct_l4proto_find(orig->src.l3num,
                                                      orig->dst.protonum));
        rcu_read_unlock();
@@ -1609,14 +1731,14 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
 
 static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 {
-       const struct nf_conntrack_l3proto *l3proto;
        const struct nf_conntrack_l4proto *l4proto;
        struct nf_conntrack_tuple_hash *h;
        struct nf_conntrack_tuple tuple;
        enum ip_conntrack_info ctinfo;
        struct nf_nat_hook *nat_hook;
-       unsigned int dataoff, status;
+       unsigned int status;
        struct nf_conn *ct;
+       int dataoff;
        u16 l3num;
        u8 l4num;
 
@@ -1625,16 +1747,15 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
                return 0;
 
        l3num = nf_ct_l3num(ct);
-       l3proto = nf_ct_l3proto_find_get(l3num);
 
-       if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
-                                &l4num) <= 0)
+       dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
+       if (dataoff <= 0)
                return -1;
 
        l4proto = nf_ct_l4proto_find_get(l3num, l4num);
 
        if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
-                            l4num, net, &tuple, l3proto, l4proto))
+                            l4num, net, &tuple, l4proto))
                return -1;
 
        if (ct->status & IPS_SRC_NAT) {
@@ -1683,6 +1804,41 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
        return 0;
 }
 
+static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+                                      const struct sk_buff *skb)
+{
+       const struct nf_conntrack_tuple *src_tuple;
+       const struct nf_conntrack_tuple_hash *hash;
+       struct nf_conntrack_tuple srctuple;
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       if (ct) {
+               src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
+               memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+               return true;
+       }
+
+       if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+                              NFPROTO_IPV4, dev_net(skb->dev),
+                              &srctuple))
+               return false;
+
+       hash = nf_conntrack_find_get(dev_net(skb->dev),
+                                    &nf_ct_zone_dflt,
+                                    &srctuple);
+       if (!hash)
+               return false;
+
+       ct = nf_ct_tuplehash_to_ctrack(hash);
+       src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
+       memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+       nf_ct_put(ct);
+
+       return true;
+}
+
 /* Bring out ya dead! */
 static struct nf_conn *
 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
@@ -2054,9 +2210,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 
-module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
-                 &nf_conntrack_htable_size, 0600);
-
 static __always_inline unsigned int total_extension_size(void)
 {
        /* remember to add new extensions below */
@@ -2204,6 +2357,7 @@ err_cachep:
 static struct nf_ct_hook nf_conntrack_hook = {
        .update         = nf_conntrack_update,
        .destroy        = destroy_conntrack,
+       .get_tuple_skb  = nf_conntrack_get_tuple_skb,
 };
 
 void nf_conntrack_init_end(void)
index 853b23206bb7a7c8730e0ca37c2b3e9433af448f..3f586ba23d925fe120f917d58f88f9842b48793c 100644 (file)
@@ -610,7 +610,6 @@ static int exp_seq_show(struct seq_file *s, void *v)
                   expect->tuple.src.l3num,
                   expect->tuple.dst.protonum);
        print_tuple(s, &expect->tuple,
-                   __nf_ct_l3proto_find(expect->tuple.src.l3num),
                    __nf_ct_l4proto_find(expect->tuple.src.l3num,
                                       expect->tuple.dst.protonum));
 
index a75b11c393128d79107fc447c5109b7d0a786ea5..d557a425289d6e39263dd997836ec3166c73edb8 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/rtnetlink.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
@@ -193,8 +192,7 @@ void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
 
 struct nf_conn_help *
-nf_ct_helper_ext_add(struct nf_conn *ct,
-                    struct nf_conntrack_helper *helper, gfp_t gfp)
+nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
        struct nf_conn_help *help;
 
@@ -263,7 +261,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
        }
 
        if (help == NULL) {
-               help = nf_ct_helper_ext_add(ct, helper, flags);
+               help = nf_ct_helper_ext_add(ct, flags);
                if (help == NULL)
                        return -ENOMEM;
        } else {
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
deleted file mode 100644 (file)
index 397e691..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
- *
- * Based largely upon the original ip_conntrack code which
- * had the following copyright information:
- *
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Author:
- *     Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/icmp.h>
-#include <linux/sysctl.h>
-#include <net/ip.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
-
-static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
-                                struct nf_conntrack_tuple *tuple)
-{
-       memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
-       memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
-
-       return true;
-}
-
-static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
-                                const struct nf_conntrack_tuple *orig)
-{
-       memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
-       memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
-
-       return true;
-}
-
-static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
-                              unsigned int *dataoff, u_int8_t *protonum)
-{
-       /* Never track !!! */
-       return -NF_ACCEPT;
-}
-
-
-struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
-       .l3proto         = PF_UNSPEC,
-       .pkt_to_tuple    = generic_pkt_to_tuple,
-       .invert_tuple    = generic_invert_tuple,
-       .get_l4proto     = generic_get_l4proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
index 20a2e37c76d124e31771c9bf96bd13216501202a..f981bfa8db72ebc2e64a3add9457fa52a4e5d184 100644 (file)
@@ -38,7 +38,6 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_acct.h>
@@ -81,9 +80,26 @@ nla_put_failure:
        return -1;
 }
 
+static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
+                               const struct nf_conntrack_tuple *tuple)
+{
+       if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
+           nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+               return -EMSGSIZE;
+       return 0;
+}
+
+static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
+                               const struct nf_conntrack_tuple *tuple)
+{
+       if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
+           nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
+               return -EMSGSIZE;
+       return 0;
+}
+
 static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
-                                   const struct nf_conntrack_tuple *tuple,
-                                   const struct nf_conntrack_l3proto *l3proto)
+                                   const struct nf_conntrack_tuple *tuple)
 {
        int ret = 0;
        struct nlattr *nest_parms;
@@ -92,8 +108,14 @@ static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
        if (!nest_parms)
                goto nla_put_failure;
 
-       if (likely(l3proto->tuple_to_nlattr))
-               ret = l3proto->tuple_to_nlattr(skb, tuple);
+       switch (tuple->src.l3num) {
+       case NFPROTO_IPV4:
+               ret = ipv4_tuple_to_nlattr(skb, tuple);
+               break;
+       case NFPROTO_IPV6:
+               ret = ipv6_tuple_to_nlattr(skb, tuple);
+               break;
+       }
 
        nla_nest_end(skb, nest_parms);
 
@@ -106,13 +128,11 @@ nla_put_failure:
 static int ctnetlink_dump_tuples(struct sk_buff *skb,
                                 const struct nf_conntrack_tuple *tuple)
 {
-       const struct nf_conntrack_l3proto *l3proto;
        const struct nf_conntrack_l4proto *l4proto;
        int ret;
 
        rcu_read_lock();
-       l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
-       ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto);
+       ret = ctnetlink_dump_tuples_ip(skb, tuple);
 
        if (ret >= 0) {
                l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
@@ -556,15 +576,20 @@ nla_put_failure:
        return -1;
 }
 
+static const struct nla_policy cta_ip_nla_policy[CTA_IP_MAX + 1] = {
+       [CTA_IP_V4_SRC] = { .type = NLA_U32 },
+       [CTA_IP_V4_DST] = { .type = NLA_U32 },
+       [CTA_IP_V6_SRC] = { .len = sizeof(__be32) * 4 },
+       [CTA_IP_V6_DST] = { .len = sizeof(__be32) * 4 },
+};
+
 #if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
 static size_t ctnetlink_proto_size(const struct nf_conn *ct)
 {
-       const struct nf_conntrack_l3proto *l3proto;
        const struct nf_conntrack_l4proto *l4proto;
        size_t len, len4 = 0;
 
-       l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-       len = l3proto->nla_size;
+       len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
        len *= 3u; /* ORIG, REPLY, MASTER */
 
        l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -936,29 +961,54 @@ out:
        return skb->len;
 }
 
+static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
+                               struct nf_conntrack_tuple *t)
+{
+       if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
+               return -EINVAL;
+
+       t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
+       t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
+
+       return 0;
+}
+
+static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
+                               struct nf_conntrack_tuple *t)
+{
+       if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
+               return -EINVAL;
+
+       t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
+       t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
+
+       return 0;
+}
+
 static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
                                    struct nf_conntrack_tuple *tuple)
 {
        struct nlattr *tb[CTA_IP_MAX+1];
-       struct nf_conntrack_l3proto *l3proto;
        int ret = 0;
 
        ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL);
        if (ret < 0)
                return ret;
 
-       rcu_read_lock();
-       l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
+       ret = nla_validate_nested(attr, CTA_IP_MAX,
+                                 cta_ip_nla_policy, NULL);
+       if (ret)
+               return ret;
 
-       if (likely(l3proto->nlattr_to_tuple)) {
-               ret = nla_validate_nested(attr, CTA_IP_MAX,
-                                         l3proto->nla_policy, NULL);
-               if (ret == 0)
-                       ret = l3proto->nlattr_to_tuple(tb, tuple);
+       switch (tuple->src.l3num) {
+       case NFPROTO_IPV4:
+               ret = ipv4_nlattr_to_tuple(tb, tuple);
+               break;
+       case NFPROTO_IPV6:
+               ret = ipv6_nlattr_to_tuple(tb, tuple);
+               break;
        }
 
-       rcu_read_unlock();
-
        return ret;
 }
 
@@ -1897,7 +1947,7 @@ ctnetlink_create_conntrack(struct net *net,
                } else {
                        struct nf_conn_help *help;
 
-                       help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC);
+                       help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
                        if (help == NULL) {
                                err = -ENOMEM;
                                goto err2;
@@ -2581,7 +2631,6 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
                                   const struct nf_conntrack_tuple *tuple,
                                   const struct nf_conntrack_tuple_mask *mask)
 {
-       const struct nf_conntrack_l3proto *l3proto;
        const struct nf_conntrack_l4proto *l4proto;
        struct nf_conntrack_tuple m;
        struct nlattr *nest_parms;
@@ -2597,8 +2646,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
                goto nla_put_failure;
 
        rcu_read_lock();
-       l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
-       ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto);
+       ret = ctnetlink_dump_tuples_ip(skb, &m);
        if (ret >= 0) {
                l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
                                               tuple->dst.protonum);
index d88841fbc560fcac4194938f4354907e51ecbcbf..803607a9010240eec2d2ba8d27551a601fcf7179 100644 (file)
@@ -1,14 +1,4 @@
-/* L3/L4 protocol support for nf_conntrack. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
- * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/types.h>
 #include <linux/netfilter.h>
 #include <linux/netdevice.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_log.h>
 
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+extern unsigned int nf_conntrack_net_id;
+
 static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
-struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_l3protos);
 
 static DEFINE_MUTEX(nf_ct_proto_mutex);
 
@@ -122,137 +134,6 @@ __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
 }
 EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
 
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-const struct nf_conntrack_l3proto *
-nf_ct_l3proto_find_get(u_int16_t l3proto)
-{
-       struct nf_conntrack_l3proto *p;
-
-       rcu_read_lock();
-       p = __nf_ct_l3proto_find(l3proto);
-       if (!try_module_get(p->me))
-               p = &nf_conntrack_l3proto_generic;
-       rcu_read_unlock();
-
-       return p;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
-
-int
-nf_ct_l3proto_try_module_get(unsigned short l3proto)
-{
-       const struct nf_conntrack_l3proto *p;
-       int ret;
-
-retry: p = nf_ct_l3proto_find_get(l3proto);
-       if (p == &nf_conntrack_l3proto_generic) {
-               ret = request_module("nf_conntrack-%d", l3proto);
-               if (!ret)
-                       goto retry;
-
-               return -EPROTOTYPE;
-       }
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get);
-
-void nf_ct_l3proto_module_put(unsigned short l3proto)
-{
-       struct nf_conntrack_l3proto *p;
-
-       /* rcu_read_lock not necessary since the caller holds a reference, but
-        * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find()
-        */
-       rcu_read_lock();
-       p = __nf_ct_l3proto_find(l3proto);
-       module_put(p->me);
-       rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
-
-static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
-{
-       const struct nf_conntrack_l3proto *l3proto;
-       int ret;
-
-       might_sleep();
-
-       ret = nf_ct_l3proto_try_module_get(nfproto);
-       if (ret < 0)
-               return ret;
-
-       /* we already have a reference, can't fail */
-       rcu_read_lock();
-       l3proto = __nf_ct_l3proto_find(nfproto);
-       rcu_read_unlock();
-
-       if (!l3proto->net_ns_get)
-               return 0;
-
-       ret = l3proto->net_ns_get(net);
-       if (ret < 0)
-               nf_ct_l3proto_module_put(nfproto);
-
-       return ret;
-}
-
-int nf_ct_netns_get(struct net *net, u8 nfproto)
-{
-       int err;
-
-       if (nfproto == NFPROTO_INET) {
-               err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
-               if (err < 0)
-                       goto err1;
-               err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
-               if (err < 0)
-                       goto err2;
-       } else {
-               err = nf_ct_netns_do_get(net, nfproto);
-               if (err < 0)
-                       goto err1;
-       }
-       return 0;
-
-err2:
-       nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
-       return err;
-}
-EXPORT_SYMBOL_GPL(nf_ct_netns_get);
-
-static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
-{
-       const struct nf_conntrack_l3proto *l3proto;
-
-       might_sleep();
-
-       /* same as nf_conntrack_netns_get(), reference assumed */
-       rcu_read_lock();
-       l3proto = __nf_ct_l3proto_find(nfproto);
-       rcu_read_unlock();
-
-       if (WARN_ON(!l3proto))
-               return;
-
-       if (l3proto->net_ns_put)
-               l3proto->net_ns_put(net);
-
-       nf_ct_l3proto_module_put(nfproto);
-}
-
-void nf_ct_netns_put(struct net *net, uint8_t nfproto)
-{
-       if (nfproto == NFPROTO_INET) {
-               nf_ct_netns_do_put(net, NFPROTO_IPV4);
-               nf_ct_netns_do_put(net, NFPROTO_IPV6);
-       } else
-               nf_ct_netns_do_put(net, nfproto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_netns_put);
-
 const struct nf_conntrack_l4proto *
 nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
 {
@@ -274,11 +155,6 @@ void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
 
-static int kill_l3proto(struct nf_conn *i, void *data)
-{
-       return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
-}
-
 static int kill_l4proto(struct nf_conn *i, void *data)
 {
        const struct nf_conntrack_l4proto *l4proto;
@@ -287,52 +163,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
               nf_ct_l3num(i) == l4proto->l3proto;
 }
 
-int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
-{
-       int ret = 0;
-       struct nf_conntrack_l3proto *old;
-
-       if (proto->l3proto >= NFPROTO_NUMPROTO)
-               return -EBUSY;
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-       if (proto->tuple_to_nlattr && proto->nla_size == 0)
-               return -EINVAL;
-#endif
-       mutex_lock(&nf_ct_proto_mutex);
-       old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
-                                       lockdep_is_held(&nf_ct_proto_mutex));
-       if (old != &nf_conntrack_l3proto_generic) {
-               ret = -EBUSY;
-               goto out_unlock;
-       }
-
-       rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
-
-out_unlock:
-       mutex_unlock(&nf_ct_proto_mutex);
-       return ret;
-
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
-
-void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
-{
-       BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
-
-       mutex_lock(&nf_ct_proto_mutex);
-       BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
-                                        lockdep_is_held(&nf_ct_proto_mutex)
-                                        ) != proto);
-       rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
-                          &nf_conntrack_l3proto_generic);
-       mutex_unlock(&nf_ct_proto_mutex);
-
-       synchronize_rcu();
-       /* Remove all contrack entries for this protocol */
-       nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
-
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
                                const struct nf_conntrack_l4proto *l4proto)
 {
@@ -499,8 +329,23 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
 
-int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
-                          unsigned int num_proto)
+static void
+nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
+                        unsigned int num_proto)
+{
+       mutex_lock(&nf_ct_proto_mutex);
+       while (num_proto-- != 0)
+               __nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+       mutex_unlock(&nf_ct_proto_mutex);
+
+       synchronize_net();
+       /* Remove all contrack entries for this protocol */
+       nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
+}
+
+static int
+nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
+                      unsigned int num_proto)
 {
        int ret = -EINVAL, ver;
        unsigned int i;
@@ -518,7 +363,6 @@ int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
        }
        return ret;
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
 
 int nf_ct_l4proto_pernet_register(struct net *net,
                                  const struct nf_conntrack_l4proto *const l4proto[],
@@ -542,20 +386,6 @@ int nf_ct_l4proto_pernet_register(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
 
-void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
-                             unsigned int num_proto)
-{
-       mutex_lock(&nf_ct_proto_mutex);
-       while (num_proto-- != 0)
-               __nf_ct_l4proto_unregister_one(l4proto[num_proto]);
-       mutex_unlock(&nf_ct_proto_mutex);
-
-       synchronize_net();
-       /* Remove all contrack entries for this protocol */
-       nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
-
 void nf_ct_l4proto_pernet_unregister(struct net *net,
                                const struct nf_conntrack_l4proto *const l4proto[],
                                unsigned int num_proto)
@@ -565,6 +395,563 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
 
+static unsigned int ipv4_helper(void *priv,
+                               struct sk_buff *skb,
+                               const struct nf_hook_state *state)
+{
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+       const struct nf_conn_help *help;
+       const struct nf_conntrack_helper *helper;
+
+       /* This is where we call the helper: as the packet goes out. */
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+               return NF_ACCEPT;
+
+       help = nfct_help(ct);
+       if (!help)
+               return NF_ACCEPT;
+
+       /* rcu_read_lock()ed by nf_hook_thresh */
+       helper = rcu_dereference(help->helper);
+       if (!helper)
+               return NF_ACCEPT;
+
+       return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+                           ct, ctinfo);
+}
+
+static unsigned int ipv4_confirm(void *priv,
+                                struct sk_buff *skb,
+                                const struct nf_hook_state *state)
+{
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+               goto out;
+
+       /* adjust seqs for loopback traffic only in outgoing direction */
+       if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+           !nf_is_loopback_packet(skb)) {
+               if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
+                       NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+                       return NF_DROP;
+               }
+       }
+out:
+       /* We've seen it coming out the other side: confirm it */
+       return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv4_conntrack_in(void *priv,
+                                     struct sk_buff *skb,
+                                     const struct nf_hook_state *state)
+{
+       return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+}
+
+static unsigned int ipv4_conntrack_local(void *priv,
+                                        struct sk_buff *skb,
+                                        const struct nf_hook_state *state)
+{
+       if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
+               enum ip_conntrack_info ctinfo;
+               struct nf_conn *tmpl;
+
+               tmpl = nf_ct_get(skb, &ctinfo);
+               if (tmpl && nf_ct_is_template(tmpl)) {
+                       /* when skipping ct, clear templates to avoid fooling
+                        * later targets/matches
+                        */
+                       skb->_nfct = 0;
+                       nf_ct_put(tmpl);
+               }
+               return NF_ACCEPT;
+       }
+
+       return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+ * make it the first hook.
+ */
+static const struct nf_hook_ops ipv4_conntrack_ops[] = {
+       {
+               .hook           = ipv4_conntrack_in,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_PRE_ROUTING,
+               .priority       = NF_IP_PRI_CONNTRACK,
+       },
+       {
+               .hook           = ipv4_conntrack_local,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_LOCAL_OUT,
+               .priority       = NF_IP_PRI_CONNTRACK,
+       },
+       {
+               .hook           = ipv4_helper,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_POST_ROUTING,
+               .priority       = NF_IP_PRI_CONNTRACK_HELPER,
+       },
+       {
+               .hook           = ipv4_confirm,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_POST_ROUTING,
+               .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
+       },
+       {
+               .hook           = ipv4_helper,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_LOCAL_IN,
+               .priority       = NF_IP_PRI_CONNTRACK_HELPER,
+       },
+       {
+               .hook           = ipv4_confirm,
+               .pf             = NFPROTO_IPV4,
+               .hooknum        = NF_INET_LOCAL_IN,
+               .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
+       },
+};
+
+/* Fast function for those who don't want to parse /proc (and I don't
+ * blame them).
+ * Reversing the socket's dst/src point of view gives us the reply
+ * mapping.
+ */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+       const struct inet_sock *inet = inet_sk(sk);
+       const struct nf_conntrack_tuple_hash *h;
+       struct nf_conntrack_tuple tuple;
+
+       memset(&tuple, 0, sizeof(tuple));
+
+       lock_sock(sk);
+       tuple.src.u3.ip = inet->inet_rcv_saddr;
+       tuple.src.u.tcp.port = inet->inet_sport;
+       tuple.dst.u3.ip = inet->inet_daddr;
+       tuple.dst.u.tcp.port = inet->inet_dport;
+       tuple.src.l3num = PF_INET;
+       tuple.dst.protonum = sk->sk_protocol;
+       release_sock(sk);
+
+       /* We only do TCP and SCTP at the moment: is there a better way? */
+       if (tuple.dst.protonum != IPPROTO_TCP &&
+           tuple.dst.protonum != IPPROTO_SCTP) {
+               pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
+               return -ENOPROTOOPT;
+       }
+
+       if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
+               pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
+                        *len, sizeof(struct sockaddr_in));
+               return -EINVAL;
+       }
+
+       h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
+       if (h) {
+               struct sockaddr_in sin;
+               struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+               sin.sin_family = AF_INET;
+               sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+                       .tuple.dst.u.tcp.port;
+               sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+                       .tuple.dst.u3.ip;
+               memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+
+               pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
+                        &sin.sin_addr.s_addr, ntohs(sin.sin_port));
+               nf_ct_put(ct);
+               if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+                       return -EFAULT;
+               else
+                       return 0;
+       }
+       pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
+                &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
+                &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
+       return -ENOENT;
+}
+
+static struct nf_sockopt_ops so_getorigdst = {
+       .pf             = PF_INET,
+       .get_optmin     = SO_ORIGINAL_DST,
+       .get_optmax     = SO_ORIGINAL_DST + 1,
+       .get            = getorigdst,
+       .owner          = THIS_MODULE,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int
+ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+       struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
+       const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+       const struct inet_sock *inet = inet_sk(sk);
+       const struct nf_conntrack_tuple_hash *h;
+       struct sockaddr_in6 sin6;
+       struct nf_conn *ct;
+       __be32 flow_label;
+       int bound_dev_if;
+
+       lock_sock(sk);
+       tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
+       tuple.src.u.tcp.port = inet->inet_sport;
+       tuple.dst.u3.in6 = sk->sk_v6_daddr;
+       tuple.dst.u.tcp.port = inet->inet_dport;
+       tuple.dst.protonum = sk->sk_protocol;
+       bound_dev_if = sk->sk_bound_dev_if;
+       flow_label = inet6->flow_label;
+       release_sock(sk);
+
+       if (tuple.dst.protonum != IPPROTO_TCP &&
+           tuple.dst.protonum != IPPROTO_SCTP)
+               return -ENOPROTOOPT;
+
+       if (*len < 0 || (unsigned int)*len < sizeof(sin6))
+               return -EINVAL;
+
+       h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
+       if (!h) {
+               pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
+                        &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
+                        &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+               return -ENOENT;
+       }
+
+       ct = nf_ct_tuplehash_to_ctrack(h);
+
+       sin6.sin6_family = AF_INET6;
+       sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+       sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
+       memcpy(&sin6.sin6_addr,
+              &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
+              sizeof(sin6.sin6_addr));
+
+       nf_ct_put(ct);
+       sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
+       return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
+}
+
+static struct nf_sockopt_ops so_getorigdst6 = {
+       .pf             = NFPROTO_IPV6,
+       .get_optmin     = IP6T_SO_ORIGINAL_DST,
+       .get_optmax     = IP6T_SO_ORIGINAL_DST + 1,
+       .get            = ipv6_getorigdst,
+       .owner          = THIS_MODULE,
+};
+
+static unsigned int ipv6_confirm(void *priv,
+                                struct sk_buff *skb,
+                                const struct nf_hook_state *state)
+{
+       struct nf_conn *ct;
+       enum ip_conntrack_info ctinfo;
+       unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+       int protoff;
+       __be16 frag_off;
+
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+               goto out;
+
+       protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+                                  &frag_off);
+       if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+               pr_debug("proto header not found\n");
+               goto out;
+       }
+
+       /* adjust seqs for loopback traffic only in outgoing direction */
+       if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+           !nf_is_loopback_packet(skb)) {
+               if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+                       NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+                       return NF_DROP;
+               }
+       }
+out:
+       /* We've seen it coming out the other side: confirm it */
+       return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv6_conntrack_in(void *priv,
+                                     struct sk_buff *skb,
+                                     const struct nf_hook_state *state)
+{
+       return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+}
+
+static unsigned int ipv6_conntrack_local(void *priv,
+                                        struct sk_buff *skb,
+                                        const struct nf_hook_state *state)
+{
+       return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+}
+
+static unsigned int ipv6_helper(void *priv,
+                               struct sk_buff *skb,
+                               const struct nf_hook_state *state)
+{
+       struct nf_conn *ct;
+       const struct nf_conn_help *help;
+       const struct nf_conntrack_helper *helper;
+       enum ip_conntrack_info ctinfo;
+       __be16 frag_off;
+       int protoff;
+       u8 nexthdr;
+
+       /* This is where we call the helper: as the packet goes out. */
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+               return NF_ACCEPT;
+
+       help = nfct_help(ct);
+       if (!help)
+               return NF_ACCEPT;
+       /* rcu_read_lock()ed by nf_hook_thresh */
+       helper = rcu_dereference(help->helper);
+       if (!helper)
+               return NF_ACCEPT;
+
+       nexthdr = ipv6_hdr(skb)->nexthdr;
+       protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+                                  &frag_off);
+       if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+               pr_debug("proto header not found\n");
+               return NF_ACCEPT;
+       }
+
+       return helper->help(skb, protoff, ct, ctinfo);
+}
+
+static const struct nf_hook_ops ipv6_conntrack_ops[] = {
+       {
+               .hook           = ipv6_conntrack_in,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_PRE_ROUTING,
+               .priority       = NF_IP6_PRI_CONNTRACK,
+       },
+       {
+               .hook           = ipv6_conntrack_local,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_LOCAL_OUT,
+               .priority       = NF_IP6_PRI_CONNTRACK,
+       },
+       {
+               .hook           = ipv6_helper,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_POST_ROUTING,
+               .priority       = NF_IP6_PRI_CONNTRACK_HELPER,
+       },
+       {
+               .hook           = ipv6_confirm,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_POST_ROUTING,
+               .priority       = NF_IP6_PRI_LAST,
+       },
+       {
+               .hook           = ipv6_helper,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_LOCAL_IN,
+               .priority       = NF_IP6_PRI_CONNTRACK_HELPER,
+       },
+       {
+               .hook           = ipv6_confirm,
+               .pf             = NFPROTO_IPV6,
+               .hooknum        = NF_INET_LOCAL_IN,
+               .priority       = NF_IP6_PRI_LAST - 1,
+       },
+};
+#endif
+
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
+{
+       struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+       int err = 0;
+
+       mutex_lock(&nf_ct_proto_mutex);
+
+       switch (nfproto) {
+       case NFPROTO_IPV4:
+               cnet->users4++;
+               if (cnet->users4 > 1)
+                       goto out_unlock;
+               err = nf_defrag_ipv4_enable(net);
+               if (err) {
+                       cnet->users4 = 0;
+                       goto out_unlock;
+               }
+
+               err = nf_register_net_hooks(net, ipv4_conntrack_ops,
+                                           ARRAY_SIZE(ipv4_conntrack_ops));
+               if (err)
+                       cnet->users4 = 0;
+               break;
+#if IS_ENABLED(CONFIG_IPV6)
+       case NFPROTO_IPV6:
+               cnet->users6++;
+               if (cnet->users6 > 1)
+                       goto out_unlock;
+               err = nf_defrag_ipv6_enable(net);
+               if (err < 0) {
+                       cnet->users6 = 0;
+                       goto out_unlock;
+               }
+
+               err = nf_register_net_hooks(net, ipv6_conntrack_ops,
+                                           ARRAY_SIZE(ipv6_conntrack_ops));
+               if (err)
+                       cnet->users6 = 0;
+               break;
+#endif
+       default:
+               err = -EPROTO;
+               break;
+       }
+ out_unlock:
+       mutex_unlock(&nf_ct_proto_mutex);
+       return err;
+}
+
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
+{
+       struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+       mutex_lock(&nf_ct_proto_mutex);
+       switch (nfproto) {
+       case NFPROTO_IPV4:
+               if (cnet->users4 && (--cnet->users4 == 0))
+                       nf_unregister_net_hooks(net, ipv4_conntrack_ops,
+                                               ARRAY_SIZE(ipv4_conntrack_ops));
+               break;
+#if IS_ENABLED(CONFIG_IPV6)
+       case NFPROTO_IPV6:
+               if (cnet->users6 && (--cnet->users6 == 0))
+                       nf_unregister_net_hooks(net, ipv6_conntrack_ops,
+                                               ARRAY_SIZE(ipv6_conntrack_ops));
+               break;
+#endif
+       }
+
+       mutex_unlock(&nf_ct_proto_mutex);
+}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+       int err;
+
+       if (nfproto == NFPROTO_INET) {
+               err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+               if (err < 0)
+                       goto err1;
+               err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+               if (err < 0)
+                       goto err2;
+       } else {
+               err = nf_ct_netns_do_get(net, nfproto);
+               if (err < 0)
+                       goto err1;
+       }
+       return 0;
+
+err2:
+       nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+       return err;
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_get);
+
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
+{
+       if (nfproto == NFPROTO_INET) {
+               nf_ct_netns_do_put(net, NFPROTO_IPV4);
+               nf_ct_netns_do_put(net, NFPROTO_IPV6);
+       } else {
+               nf_ct_netns_do_put(net, nfproto);
+       }
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_put);
+
+static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
+       &nf_conntrack_l4proto_tcp4,
+       &nf_conntrack_l4proto_udp4,
+       &nf_conntrack_l4proto_icmp,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+       &nf_conntrack_l4proto_dccp4,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+       &nf_conntrack_l4proto_sctp4,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+       &nf_conntrack_l4proto_udplite4,
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+       &nf_conntrack_l4proto_tcp6,
+       &nf_conntrack_l4proto_udp6,
+       &nf_conntrack_l4proto_icmpv6,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+       &nf_conntrack_l4proto_dccp6,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+       &nf_conntrack_l4proto_sctp6,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+       &nf_conntrack_l4proto_udplite6,
+#endif
+#endif /* CONFIG_IPV6 */
+};
+
+int nf_conntrack_proto_init(void)
+{
+       int ret = 0;
+
+       ret = nf_register_sockopt(&so_getorigdst);
+       if (ret < 0)
+               return ret;
+
+#if IS_ENABLED(CONFIG_IPV6)
+       ret = nf_register_sockopt(&so_getorigdst6);
+       if (ret < 0)
+               goto cleanup_sockopt;
+#endif
+       ret = nf_ct_l4proto_register(builtin_l4proto,
+                                    ARRAY_SIZE(builtin_l4proto));
+       if (ret < 0)
+               goto cleanup_sockopt2;
+
+       return ret;
+cleanup_sockopt2:
+       nf_unregister_sockopt(&so_getorigdst);
+#if IS_ENABLED(CONFIG_IPV6)
+cleanup_sockopt:
+       nf_unregister_sockopt(&so_getorigdst6);
+#endif
+       return ret;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+       unsigned int i;
+
+       nf_ct_l4proto_unregister(builtin_l4proto,
+                                ARRAY_SIZE(builtin_l4proto));
+       nf_unregister_sockopt(&so_getorigdst);
+#if IS_ENABLED(CONFIG_IPV6)
+       nf_unregister_sockopt(&so_getorigdst6);
+#endif
+
+       /* free l3proto protocol tables */
+       for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
+               kfree(nf_ct_protos[i]);
+}
+
 int nf_conntrack_proto_pernet_init(struct net *net)
 {
        int err;
@@ -581,6 +968,14 @@ int nf_conntrack_proto_pernet_init(struct net *net)
        if (err < 0)
                return err;
 
+       err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
+                                           ARRAY_SIZE(builtin_l4proto));
+       if (err < 0) {
+               nf_ct_l4proto_unregister_sysctl(net, pn,
+                                               &nf_conntrack_l4proto_generic);
+               return err;
+       }
+
        pn->users++;
        return 0;
 }
@@ -590,25 +985,19 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
        struct nf_proto_net *pn = nf_ct_l4proto_net(net,
                                        &nf_conntrack_l4proto_generic);
 
+       nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
+                                       ARRAY_SIZE(builtin_l4proto));
        pn->users--;
        nf_ct_l4proto_unregister_sysctl(net,
                                        pn,
                                        &nf_conntrack_l4proto_generic);
 }
 
-int nf_conntrack_proto_init(void)
-{
-       unsigned int i;
-       for (i = 0; i < NFPROTO_NUMPROTO; i++)
-               rcu_assign_pointer(nf_ct_l3protos[i],
-                                  &nf_conntrack_l3proto_generic);
-       return 0;
-}
 
-void nf_conntrack_proto_fini(void)
-{
-       unsigned int i;
-       /* free l3proto protocol tables */
-       for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
-               kfree(nf_ct_protos[i]);
-}
+module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
+                 &nf_conntrack_htable_size, 0600);
+
+MODULE_ALIAS("ip_conntrack");
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
+MODULE_LICENSE("GPL");
index 9ce6336d1e559459235f755be7db4a7adb9ebfc1..8c58f96b59e701422cd60303dfc6a2f3db8c1b04 100644 (file)
@@ -23,6 +23,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_log.h>
 
 /* Timeouts are based on values from RFC4340:
@@ -388,31 +389,8 @@ static inline struct nf_dccp_net *dccp_pernet(struct net *net)
        return &net->ct.nf_ct_proto.dccp;
 }
 
-static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-                             struct net *net, struct nf_conntrack_tuple *tuple)
-{
-       struct dccp_hdr _hdr, *dh;
-
-       /* Actually only need first 4 bytes to get ports. */
-       dh = skb_header_pointer(skb, dataoff, 4, &_hdr);
-       if (dh == NULL)
-               return false;
-
-       tuple->src.u.dccp.port = dh->dccph_sport;
-       tuple->dst.u.dccp.port = dh->dccph_dport;
-       return true;
-}
-
-static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
-                             const struct nf_conntrack_tuple *tuple)
-{
-       inv->src.u.dccp.port = tuple->dst.u.dccp.port;
-       inv->dst.u.dccp.port = tuple->src.u.dccp.port;
-       return true;
-}
-
 static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
-                    unsigned int dataoff, unsigned int *timeouts)
+                    unsigned int dataoff)
 {
        struct net *net = nf_ct_net(ct);
        struct nf_dccp_net *dn;
@@ -460,19 +438,14 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
                     ntohl(dhack->dccph_ack_nr_low);
 }
 
-static unsigned int *dccp_get_timeouts(struct net *net)
-{
-       return dccp_pernet(net)->dccp_timeout;
-}
-
 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
-                      unsigned int dataoff, enum ip_conntrack_info ctinfo,
-                      unsigned int *timeouts)
+                      unsigned int dataoff, enum ip_conntrack_info ctinfo)
 {
        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
        struct dccp_hdr _dh, *dh;
        u_int8_t type, old_state, new_state;
        enum ct_dccp_roles role;
+       unsigned int *timeouts;
 
        dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
        BUG_ON(dh == NULL);
@@ -546,6 +519,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
        if (new_state != old_state)
                nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 
+       timeouts = nf_ct_timeout_lookup(ct);
+       if (!timeouts)
+               timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
        nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
        return NF_ACCEPT;
@@ -864,11 +840,8 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
        .l3proto                = AF_INET,
        .l4proto                = IPPROTO_DCCP,
-       .pkt_to_tuple           = dccp_pkt_to_tuple,
-       .invert_tuple           = dccp_invert_tuple,
        .new                    = dccp_new,
        .packet                 = dccp_packet,
-       .get_timeouts           = dccp_get_timeouts,
        .error                  = dccp_error,
        .can_early_drop         = dccp_can_early_drop,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
@@ -900,11 +873,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
        .l3proto                = AF_INET6,
        .l4proto                = IPPROTO_DCCP,
-       .pkt_to_tuple           = dccp_pkt_to_tuple,
-       .invert_tuple           = dccp_invert_tuple,
        .new                    = dccp_new,
        .packet                 = dccp_packet,
-       .get_timeouts           = dccp_get_timeouts,
        .error                  = dccp_error,
        .can_early_drop         = dccp_can_early_drop,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
index 6c6896d21cd738f8ea680f9db4c0c7225354e711..ac4a0b296dcda7c7bbc51727fd0d57974813f37a 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/timer.h>
 #include <linux/netfilter.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 
 static const unsigned int nf_ct_generic_timeout = 600*HZ;
 
@@ -41,34 +42,24 @@ static bool generic_pkt_to_tuple(const struct sk_buff *skb,
        return true;
 }
 
-static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
-                                const struct nf_conntrack_tuple *orig)
-{
-       tuple->src.u.all = 0;
-       tuple->dst.u.all = 0;
-
-       return true;
-}
-
-static unsigned int *generic_get_timeouts(struct net *net)
-{
-       return &(generic_pernet(net)->timeout);
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int generic_packet(struct nf_conn *ct,
                          const struct sk_buff *skb,
                          unsigned int dataoff,
-                         enum ip_conntrack_info ctinfo,
-                         unsigned int *timeout)
+                         enum ip_conntrack_info ctinfo)
 {
+       const unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+       if (!timeout)
+               timeout = &generic_pernet(nf_ct_net(ct))->timeout;
+
        nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
        return NF_ACCEPT;
 }
 
 /* Called when a new connection for this protocol found. */
 static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
-                       unsigned int dataoff, unsigned int *timeouts)
+                       unsigned int dataoff)
 {
        bool ret;
 
@@ -87,8 +78,11 @@ static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
 static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
                                         struct net *net, void *data)
 {
-       unsigned int *timeout = data;
        struct nf_generic_net *gn = generic_pernet(net);
+       unsigned int *timeout = data;
+
+       if (!timeout)
+               timeout = &gn->timeout;
 
        if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT])
                *timeout =
@@ -168,9 +162,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
        .l3proto                = PF_UNSPEC,
        .l4proto                = 255,
        .pkt_to_tuple           = generic_pkt_to_tuple,
-       .invert_tuple           = generic_invert_tuple,
        .packet                 = generic_packet,
-       .get_timeouts           = generic_get_timeouts,
        .new                    = generic_new,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
        .ctnl_timeout           = {
index d049ea5a3770df595f49511cd4ad96eb1195ca00..d1632252bf5b65611e0a5afc7c87c96607fe6a71 100644 (file)
@@ -39,6 +39,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
@@ -179,15 +180,6 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
 
 /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
 
-/* invert gre part of tuple */
-static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
-                            const struct nf_conntrack_tuple *orig)
-{
-       tuple->dst.u.gre.key = orig->src.u.gre.key;
-       tuple->src.u.gre.key = orig->dst.u.gre.key;
-       return true;
-}
-
 /* gre hdr info to tuple */
 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
                             struct net *net, struct nf_conntrack_tuple *tuple)
@@ -243,8 +235,7 @@ static unsigned int *gre_get_timeouts(struct net *net)
 static int gre_packet(struct nf_conn *ct,
                      const struct sk_buff *skb,
                      unsigned int dataoff,
-                     enum ip_conntrack_info ctinfo,
-                     unsigned int *timeouts)
+                     enum ip_conntrack_info ctinfo)
 {
        /* If we've seen traffic both ways, this is a GRE connection.
         * Extend timeout. */
@@ -263,8 +254,13 @@ static int gre_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
-                   unsigned int dataoff, unsigned int *timeouts)
+                   unsigned int dataoff)
 {
+       unsigned int *timeouts = nf_ct_timeout_lookup(ct);
+
+       if (!timeouts)
+               timeouts = gre_get_timeouts(nf_ct_net(ct));
+
        pr_debug(": ");
        nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 
@@ -300,6 +296,8 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[],
        unsigned int *timeouts = data;
        struct netns_proto_gre *net_gre = gre_pernet(net);
 
+       if (!timeouts)
+               timeouts = gre_get_timeouts(net);
        /* set default timeouts for GRE. */
        timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED];
        timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED];
@@ -356,11 +354,9 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
        .l3proto         = AF_INET,
        .l4proto         = IPPROTO_GRE,
        .pkt_to_tuple    = gre_pkt_to_tuple,
-       .invert_tuple    = gre_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
        .print_conntrack = gre_print_conntrack,
 #endif
-       .get_timeouts    = gre_get_timeouts,
        .packet          = gre_packet,
        .new             = gre_new,
        .destroy         = gre_destroy,
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644 (file)
index 0000000..036670b
--- /dev/null
@@ -0,0 +1,388 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_log.h>
+
+static const unsigned int nf_ct_icmp_timeout = 30*HZ;
+
+static inline struct nf_icmp_net *icmp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.icmp;
+}
+
+static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+                             struct net *net, struct nf_conntrack_tuple *tuple)
+{
+       const struct icmphdr *hp;
+       struct icmphdr _hdr;
+
+       hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+       if (hp == NULL)
+               return false;
+
+       tuple->dst.u.icmp.type = hp->type;
+       tuple->src.u.icmp.id = hp->un.echo.id;
+       tuple->dst.u.icmp.code = hp->code;
+
+       return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+       [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+       [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+       [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+       [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+       [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+       [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+       [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+       [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
+static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+                             const struct nf_conntrack_tuple *orig)
+{
+       if (orig->dst.u.icmp.type >= sizeof(invmap) ||
+           !invmap[orig->dst.u.icmp.type])
+               return false;
+
+       tuple->src.u.icmp.id = orig->src.u.icmp.id;
+       tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+       tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+       return true;
+}
+
+static unsigned int *icmp_get_timeouts(struct net *net)
+{
+       return &icmp_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct nf_conn *ct,
+                      const struct sk_buff *skb,
+                      unsigned int dataoff,
+                      enum ip_conntrack_info ctinfo)
+{
+       /* Do not immediately delete the connection after the first
+          successful reply to avoid excessive conntrackd traffic
+          and also to handle correctly ICMP echo reply duplicates. */
+       unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+       if (!timeout)
+               timeout = icmp_get_timeouts(nf_ct_net(ct));
+
+       nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+       return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
+                    unsigned int dataoff)
+{
+       static const u_int8_t valid_new[] = {
+               [ICMP_ECHO] = 1,
+               [ICMP_TIMESTAMP] = 1,
+               [ICMP_INFO_REQUEST] = 1,
+               [ICMP_ADDRESS] = 1
+       };
+
+       if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
+           !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
+               /* Can't create a new ICMP `conn' with this. */
+               pr_debug("icmp: can't create new conn with type %u\n",
+                        ct->tuplehash[0].tuple.dst.u.icmp.type);
+               nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
+               return false;
+       }
+       return true;
+}
+
+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
+static int
+icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
+                unsigned int hooknum)
+{
+       struct nf_conntrack_tuple innertuple, origtuple;
+       const struct nf_conntrack_l4proto *innerproto;
+       const struct nf_conntrack_tuple_hash *h;
+       const struct nf_conntrack_zone *zone;
+       enum ip_conntrack_info ctinfo;
+       struct nf_conntrack_zone tmp;
+
+       WARN_ON(skb_nfct(skb));
+       zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
+
+       /* Are they talking about one of our connections? */
+       if (!nf_ct_get_tuplepr(skb,
+                              skb_network_offset(skb) + ip_hdrlen(skb)
+                                                      + sizeof(struct icmphdr),
+                              PF_INET, net, &origtuple)) {
+               pr_debug("icmp_error_message: failed to get tuple\n");
+               return -NF_ACCEPT;
+       }
+
+       /* rcu_read_lock()ed by nf_hook_thresh */
+       innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
+
+       /* Ordinarily, we'd expect the inverted tupleproto, but it's
+          been preserved inside the ICMP. */
+       if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
+               pr_debug("icmp_error_message: no match\n");
+               return -NF_ACCEPT;
+       }
+
+       ctinfo = IP_CT_RELATED;
+
+       h = nf_conntrack_find_get(net, zone, &innertuple);
+       if (!h) {
+               pr_debug("icmp_error_message: no match\n");
+               return -NF_ACCEPT;
+       }
+
+       if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+               ctinfo += IP_CT_IS_REPLY;
+
+       /* Update skb to refer to this connection */
+       nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+       return NF_ACCEPT;
+}
+
+static void icmp_error_log(const struct sk_buff *skb, struct net *net,
+                          u8 pf, const char *msg)
+{
+       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct net *net, struct nf_conn *tmpl,
+          struct sk_buff *skb, unsigned int dataoff,
+          u8 pf, unsigned int hooknum)
+{
+       const struct icmphdr *icmph;
+       struct icmphdr _ih;
+
+       /* Not enough header? */
+       icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+       if (icmph == NULL) {
+               icmp_error_log(skb, net, pf, "short packet");
+               return -NF_ACCEPT;
+       }
+
+       /* See ip_conntrack_proto_tcp.c */
+       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+           nf_ip_checksum(skb, hooknum, dataoff, 0)) {
+               icmp_error_log(skb, net, pf, "bad hw icmp checksum");
+               return -NF_ACCEPT;
+       }
+
+       /*
+        *      18 is the highest 'known' ICMP type. Anything else is a mystery
+        *
+        *      RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
+        *                discarded.
+        */
+       if (icmph->type > NR_ICMP_TYPES) {
+               icmp_error_log(skb, net, pf, "invalid icmp type");
+               return -NF_ACCEPT;
+       }
+
+       /* Need to track icmp error message? */
+       if (icmph->type != ICMP_DEST_UNREACH &&
+           icmph->type != ICMP_SOURCE_QUENCH &&
+           icmph->type != ICMP_TIME_EXCEEDED &&
+           icmph->type != ICMP_PARAMETERPROB &&
+           icmph->type != ICMP_REDIRECT)
+               return NF_ACCEPT;
+
+       return icmp_error_message(net, tmpl, skb, hooknum);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nlattr(struct sk_buff *skb,
+                               const struct nf_conntrack_tuple *t)
+{
+       if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
+           nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
+           nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
+       [CTA_PROTO_ICMP_TYPE]   = { .type = NLA_U8 },
+       [CTA_PROTO_ICMP_CODE]   = { .type = NLA_U8 },
+       [CTA_PROTO_ICMP_ID]     = { .type = NLA_U16 },
+};
+
+static int icmp_nlattr_to_tuple(struct nlattr *tb[],
+                               struct nf_conntrack_tuple *tuple)
+{
+       if (!tb[CTA_PROTO_ICMP_TYPE] ||
+           !tb[CTA_PROTO_ICMP_CODE] ||
+           !tb[CTA_PROTO_ICMP_ID])
+               return -EINVAL;
+
+       tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+       tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+       tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
+
+       if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
+           !invmap[tuple->dst.u.icmp.type])
+               return -EINVAL;
+
+       return 0;
+}
+
+static unsigned int icmp_nlattr_tuple_size(void)
+{
+       static unsigned int size __read_mostly;
+
+       if (!size)
+               size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+
+       return size;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
+                                     struct net *net, void *data)
+{
+       unsigned int *timeout = data;
+       struct nf_icmp_net *in = icmp_pernet(net);
+
+       if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
+               if (!timeout)
+                       timeout = &in->timeout;
+               *timeout =
+                       ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
+       } else if (timeout) {
+               /* Set default ICMP timeout. */
+               *timeout = in->timeout;
+       }
+       return 0;
+}
+
+static int
+icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+       const unsigned int *timeout = data;
+
+       if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -ENOSPC;
+}
+
+static const struct nla_policy
+icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
+       [CTA_TIMEOUT_ICMP_TIMEOUT]      = { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmp_sysctl_table[] = {
+       {
+               .procname       = "nf_conntrack_icmp_timeout",
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_jiffies,
+       },
+       { }
+};
+#endif /* CONFIG_SYSCTL */
+
+static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+                                    struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+       pn->ctl_table = kmemdup(icmp_sysctl_table,
+                               sizeof(icmp_sysctl_table),
+                               GFP_KERNEL);
+       if (!pn->ctl_table)
+               return -ENOMEM;
+
+       pn->ctl_table[0].data = &in->timeout;
+#endif
+       return 0;
+}
+
+static int icmp_init_net(struct net *net, u_int16_t proto)
+{
+       struct nf_icmp_net *in = icmp_pernet(net);
+       struct nf_proto_net *pn = &in->pn;
+
+       in->timeout = nf_ct_icmp_timeout;
+
+       return icmp_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmp_get_net_proto(struct net *net)
+{
+       return &net->ct.nf_ct_proto.icmp.pn;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
+{
+       .l3proto                = PF_INET,
+       .l4proto                = IPPROTO_ICMP,
+       .pkt_to_tuple           = icmp_pkt_to_tuple,
+       .invert_tuple           = icmp_invert_tuple,
+       .packet                 = icmp_packet,
+       .new                    = icmp_new,
+       .error                  = icmp_error,
+       .destroy                = NULL,
+       .me                     = NULL,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+       .tuple_to_nlattr        = icmp_tuple_to_nlattr,
+       .nlattr_tuple_size      = icmp_nlattr_tuple_size,
+       .nlattr_to_tuple        = icmp_nlattr_to_tuple,
+       .nla_policy             = icmp_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+       .ctnl_timeout           = {
+               .nlattr_to_obj  = icmp_timeout_nlattr_to_obj,
+               .obj_to_nlattr  = icmp_timeout_obj_to_nlattr,
+               .nlattr_max     = CTA_TIMEOUT_ICMP_MAX,
+               .obj_size       = sizeof(unsigned int),
+               .nla_policy     = icmp_timeout_nla_policy,
+       },
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+       .init_net               = icmp_init_net,
+       .get_net_proto          = icmp_get_net_proto,
+};
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644 (file)
index 0000000..bed07b9
--- /dev/null
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *     Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
+#include <net/netfilter/nf_log.h>
+
+static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
+
+static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.icmpv6;
+}
+
+static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+                               unsigned int dataoff,
+                               struct net *net,
+                               struct nf_conntrack_tuple *tuple)
+{
+       const struct icmp6hdr *hp;
+       struct icmp6hdr _hdr;
+
+       hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+       if (hp == NULL)
+               return false;
+       tuple->dst.u.icmp.type = hp->icmp6_type;
+       tuple->src.u.icmp.id = hp->icmp6_identifier;
+       tuple->dst.u.icmp.code = hp->icmp6_code;
+
+       return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+       [ICMPV6_ECHO_REQUEST - 128]     = ICMPV6_ECHO_REPLY + 1,
+       [ICMPV6_ECHO_REPLY - 128]       = ICMPV6_ECHO_REQUEST + 1,
+       [ICMPV6_NI_QUERY - 128]         = ICMPV6_NI_REPLY + 1,
+       [ICMPV6_NI_REPLY - 128]         = ICMPV6_NI_QUERY + 1
+};
+
+static const u_int8_t noct_valid_new[] = {
+       [ICMPV6_MGM_QUERY - 130] = 1,
+       [ICMPV6_MGM_REPORT - 130] = 1,
+       [ICMPV6_MGM_REDUCTION - 130] = 1,
+       [NDISC_ROUTER_SOLICITATION - 130] = 1,
+       [NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
+       [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
+       [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
+       [ICMPV6_MLD2_REPORT - 130] = 1
+};
+
+static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+                               const struct nf_conntrack_tuple *orig)
+{
+       int type = orig->dst.u.icmp.type - 128;
+       if (type < 0 || type >= sizeof(invmap) || !invmap[type])
+               return false;
+
+       tuple->src.u.icmp.id   = orig->src.u.icmp.id;
+       tuple->dst.u.icmp.type = invmap[type] - 1;
+       tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+       return true;
+}
+
+static unsigned int *icmpv6_get_timeouts(struct net *net)
+{
+       return &icmpv6_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmpv6_packet(struct nf_conn *ct,
+                      const struct sk_buff *skb,
+                      unsigned int dataoff,
+                      enum ip_conntrack_info ctinfo)
+{
+       unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+       if (!timeout)
+               timeout = icmpv6_get_timeouts(nf_ct_net(ct));
+
+       /* Do not immediately delete the connection after the first
+          successful reply to avoid excessive conntrackd traffic
+          and also to handle correctly ICMP echo reply duplicates. */
+       nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+       return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
+                      unsigned int dataoff)
+{
+       static const u_int8_t valid_new[] = {
+               [ICMPV6_ECHO_REQUEST - 128] = 1,
+               [ICMPV6_NI_QUERY - 128] = 1
+       };
+       int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
+
+       if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
+               /* Can't create a new ICMPv6 `conn' with this. */
+               pr_debug("icmpv6: can't create new conn with type %u\n",
+                        type + 128);
+               nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+               return false;
+       }
+       return true;
+}
+
+static int
+icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
+                    struct sk_buff *skb,
+                    unsigned int icmp6off)
+{
+       struct nf_conntrack_tuple intuple, origtuple;
+       const struct nf_conntrack_tuple_hash *h;
+       const struct nf_conntrack_l4proto *inproto;
+       enum ip_conntrack_info ctinfo;
+       struct nf_conntrack_zone tmp;
+
+       WARN_ON(skb_nfct(skb));
+
+       /* Are they talking about one of our connections? */
+       if (!nf_ct_get_tuplepr(skb,
+                              skb_network_offset(skb)
+                               + sizeof(struct ipv6hdr)
+                               + sizeof(struct icmp6hdr),
+                              PF_INET6, net, &origtuple)) {
+               pr_debug("icmpv6_error: Can't get tuple\n");
+               return -NF_ACCEPT;
+       }
+
+       /* rcu_read_lock()ed by nf_hook_thresh */
+       inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
+
+       /* Ordinarily, we'd expect the inverted tupleproto, but it's
+          been preserved inside the ICMP. */
+       if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
+               pr_debug("icmpv6_error: Can't invert tuple\n");
+               return -NF_ACCEPT;
+       }
+
+       ctinfo = IP_CT_RELATED;
+
+       h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
+                                 &intuple);
+       if (!h) {
+               pr_debug("icmpv6_error: no match\n");
+               return -NF_ACCEPT;
+       } else {
+               if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+                       ctinfo += IP_CT_IS_REPLY;
+       }
+
+       /* Update skb to refer to this connection */
+       nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+       return NF_ACCEPT;
+}
+
+static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
+                            u8 pf, const char *msg)
+{
+       nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
+}
+
+static int
+icmpv6_error(struct net *net, struct nf_conn *tmpl,
+            struct sk_buff *skb, unsigned int dataoff,
+            u8 pf, unsigned int hooknum)
+{
+       const struct icmp6hdr *icmp6h;
+       struct icmp6hdr _ih;
+       int type;
+
+       icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
+       if (icmp6h == NULL) {
+               icmpv6_error_log(skb, net, pf, "short packet");
+               return -NF_ACCEPT;
+       }
+
+       if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+           nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
+               icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
+               return -NF_ACCEPT;
+       }
+
+       type = icmp6h->icmp6_type - 130;
+       if (type >= 0 && type < sizeof(noct_valid_new) &&
+           noct_valid_new[type]) {
+               nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+               return NF_ACCEPT;
+       }
+
+       /* is not error message ? */
+       if (icmp6h->icmp6_type >= 128)
+               return NF_ACCEPT;
+
+       return icmpv6_error_message(net, tmpl, skb, dataoff);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
+                                 const struct nf_conntrack_tuple *t)
+{
+       if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) ||
+           nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) ||
+           nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
+       [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 },
+       [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 },
+       [CTA_PROTO_ICMPV6_ID]   = { .type = NLA_U16 },
+};
+
+static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
+                               struct nf_conntrack_tuple *tuple)
+{
+       if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
+           !tb[CTA_PROTO_ICMPV6_CODE] ||
+           !tb[CTA_PROTO_ICMPV6_ID])
+               return -EINVAL;
+
+       tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
+       tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
+       tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
+
+       if (tuple->dst.u.icmp.type < 128 ||
+           tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
+           !invmap[tuple->dst.u.icmp.type - 128])
+               return -EINVAL;
+
+       return 0;
+}
+
+static unsigned int icmpv6_nlattr_tuple_size(void)
+{
+       static unsigned int size __read_mostly;
+
+       if (!size)
+               size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+
+       return size;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
+                                       struct net *net, void *data)
+{
+       unsigned int *timeout = data;
+       struct nf_icmp_net *in = icmpv6_pernet(net);
+
+       if (!timeout)
+               timeout = icmpv6_get_timeouts(net);
+       if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
+               *timeout =
+                   ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
+       } else {
+               /* Set default ICMPv6 timeout. */
+               *timeout = in->timeout;
+       }
+       return 0;
+}
+
+static int
+icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+       const unsigned int *timeout = data;
+
+       if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -ENOSPC;
+}
+
+static const struct nla_policy
+icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
+       [CTA_TIMEOUT_ICMPV6_TIMEOUT]    = { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmpv6_sysctl_table[] = {
+       {
+               .procname       = "nf_conntrack_icmpv6_timeout",
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_jiffies,
+       },
+       { }
+};
+#endif /* CONFIG_SYSCTL */
+
+static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
+                                      struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+       pn->ctl_table = kmemdup(icmpv6_sysctl_table,
+                               sizeof(icmpv6_sysctl_table),
+                               GFP_KERNEL);
+       if (!pn->ctl_table)
+               return -ENOMEM;
+
+       pn->ctl_table[0].data = &in->timeout;
+#endif
+       return 0;
+}
+
+static int icmpv6_init_net(struct net *net, u_int16_t proto)
+{
+       struct nf_icmp_net *in = icmpv6_pernet(net);
+       struct nf_proto_net *pn = &in->pn;
+
+       in->timeout = nf_ct_icmpv6_timeout;
+
+       return icmpv6_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
+{
+       return &net->ct.nf_ct_proto.icmpv6.pn;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
+{
+       .l3proto                = PF_INET6,
+       .l4proto                = IPPROTO_ICMPV6,
+       .pkt_to_tuple           = icmpv6_pkt_to_tuple,
+       .invert_tuple           = icmpv6_invert_tuple,
+       .packet                 = icmpv6_packet,
+       .new                    = icmpv6_new,
+       .error                  = icmpv6_error,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+       .tuple_to_nlattr        = icmpv6_tuple_to_nlattr,
+       .nlattr_tuple_size      = icmpv6_nlattr_tuple_size,
+       .nlattr_to_tuple        = icmpv6_nlattr_to_tuple,
+       .nla_policy             = icmpv6_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+       .ctnl_timeout           = {
+               .nlattr_to_obj  = icmpv6_timeout_nlattr_to_obj,
+               .obj_to_nlattr  = icmpv6_timeout_obj_to_nlattr,
+               .nlattr_max     = CTA_TIMEOUT_ICMP_MAX,
+               .obj_size       = sizeof(unsigned int),
+               .nla_policy     = icmpv6_timeout_nla_policy,
+       },
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+       .init_net               = icmpv6_init_net,
+       .get_net_proto          = icmpv6_get_net_proto,
+};
index fb9a35d1606996897c527be85afae52481edc16b..8d1e085fc14a4a3776e8d67ca06b7cc82519fcb0 100644 (file)
@@ -28,6 +28,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 
 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
    closely.  They're more complex. --RR
@@ -150,30 +151,6 @@ static inline struct nf_sctp_net *sctp_pernet(struct net *net)
        return &net->ct.nf_ct_proto.sctp;
 }
 
-static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-                             struct net *net, struct nf_conntrack_tuple *tuple)
-{
-       const struct sctphdr *hp;
-       struct sctphdr _hdr;
-
-       /* Actually only need first 4 bytes to get ports. */
-       hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
-       if (hp == NULL)
-               return false;
-
-       tuple->src.u.sctp.port = hp->source;
-       tuple->dst.u.sctp.port = hp->dest;
-       return true;
-}
-
-static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
-                             const struct nf_conntrack_tuple *orig)
-{
-       tuple->src.u.sctp.port = orig->dst.u.sctp.port;
-       tuple->dst.u.sctp.port = orig->src.u.sctp.port;
-       return true;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -296,17 +273,11 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
        return sctp_conntracks[dir][i][cur_state];
 }
 
-static unsigned int *sctp_get_timeouts(struct net *net)
-{
-       return sctp_pernet(net)->timeouts;
-}
-
 /* Returns verdict for packet, or -NF_ACCEPT for invalid. */
 static int sctp_packet(struct nf_conn *ct,
                       const struct sk_buff *skb,
                       unsigned int dataoff,
-                      enum ip_conntrack_info ctinfo,
-                      unsigned int *timeouts)
+                      enum ip_conntrack_info ctinfo)
 {
        enum sctp_conntrack new_state, old_state;
        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -315,6 +286,7 @@ static int sctp_packet(struct nf_conn *ct,
        const struct sctp_chunkhdr *sch;
        struct sctp_chunkhdr _sch;
        u_int32_t offset, count;
+       unsigned int *timeouts;
        unsigned long map[256 / sizeof(unsigned long)] = { 0 };
 
        sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
@@ -403,6 +375,10 @@ static int sctp_packet(struct nf_conn *ct,
        }
        spin_unlock_bh(&ct->lock);
 
+       timeouts = nf_ct_timeout_lookup(ct);
+       if (!timeouts)
+               timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
+
        nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
        if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
@@ -423,7 +399,7 @@ out:
 
 /* Called when a new connection for this protocol found. */
 static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
-                    unsigned int dataoff, unsigned int *timeouts)
+                    unsigned int dataoff)
 {
        enum sctp_conntrack new_state;
        const struct sctphdr *sh;
@@ -780,13 +756,10 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
        .l3proto                = PF_INET,
        .l4proto                = IPPROTO_SCTP,
-       .pkt_to_tuple           = sctp_pkt_to_tuple,
-       .invert_tuple           = sctp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
        .print_conntrack        = sctp_print_conntrack,
 #endif
        .packet                 = sctp_packet,
-       .get_timeouts           = sctp_get_timeouts,
        .new                    = sctp_new,
        .error                  = sctp_error,
        .can_early_drop         = sctp_can_early_drop,
@@ -817,13 +790,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
        .l3proto                = PF_INET6,
        .l4proto                = IPPROTO_SCTP,
-       .pkt_to_tuple           = sctp_pkt_to_tuple,
-       .invert_tuple           = sctp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
        .print_conntrack        = sctp_print_conntrack,
 #endif
        .packet                 = sctp_packet,
-       .get_timeouts           = sctp_get_timeouts,
        .new                    = sctp_new,
        .error                  = sctp_error,
        .can_early_drop         = sctp_can_early_drop,
index 8e67910185a05717628837d34c7e4fe829ca7424..d80d322b9d8ba0fcaec08a87a5e7ddcb6c533fa4 100644 (file)
@@ -29,6 +29,7 @@
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_log.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -276,31 +277,6 @@ static inline struct nf_tcp_net *tcp_pernet(struct net *net)
        return &net->ct.nf_ct_proto.tcp;
 }
 
-static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-                            struct net *net, struct nf_conntrack_tuple *tuple)
-{
-       const struct tcphdr *hp;
-       struct tcphdr _hdr;
-
-       /* Actually only need first 4 bytes to get ports. */
-       hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
-       if (hp == NULL)
-               return false;
-
-       tuple->src.u.tcp.port = hp->source;
-       tuple->dst.u.tcp.port = hp->dest;
-
-       return true;
-}
-
-static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
-                            const struct nf_conntrack_tuple *orig)
-{
-       tuple->src.u.tcp.port = orig->dst.u.tcp.port;
-       tuple->dst.u.tcp.port = orig->src.u.tcp.port;
-       return true;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -793,27 +769,21 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
        return NF_ACCEPT;
 }
 
-static unsigned int *tcp_get_timeouts(struct net *net)
-{
-       return tcp_pernet(net)->timeouts;
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int tcp_packet(struct nf_conn *ct,
                      const struct sk_buff *skb,
                      unsigned int dataoff,
-                     enum ip_conntrack_info ctinfo,
-                     unsigned int *timeouts)
+                     enum ip_conntrack_info ctinfo)
 {
        struct net *net = nf_ct_net(ct);
        struct nf_tcp_net *tn = tcp_pernet(net);
        struct nf_conntrack_tuple *tuple;
        enum tcp_conntrack new_state, old_state;
+       unsigned int index, *timeouts;
        enum ip_conntrack_dir dir;
        const struct tcphdr *th;
        struct tcphdr _tcph;
        unsigned long timeout;
-       unsigned int index;
 
        th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
        BUG_ON(th == NULL);
@@ -1046,6 +1016,10 @@ static int tcp_packet(struct nf_conn *ct,
            && new_state == TCP_CONNTRACK_FIN_WAIT)
                ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
 
+       timeouts = nf_ct_timeout_lookup(ct);
+       if (!timeouts)
+               timeouts = tn->timeouts;
+
        if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
            timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
                timeout = timeouts[TCP_CONNTRACK_RETRANS];
@@ -1095,7 +1069,7 @@ static int tcp_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
-                   unsigned int dataoff, unsigned int *timeouts)
+                   unsigned int dataoff)
 {
        enum tcp_conntrack new_state;
        const struct tcphdr *th;
@@ -1313,10 +1287,12 @@ static unsigned int tcp_nlattr_tuple_size(void)
 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
                                     struct net *net, void *data)
 {
-       unsigned int *timeouts = data;
        struct nf_tcp_net *tn = tcp_pernet(net);
+       unsigned int *timeouts = data;
        int i;
 
+       if (!timeouts)
+               timeouts = tn->timeouts;
        /* set default TCP timeouts. */
        for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
                timeouts[i] = tn->timeouts[i];
@@ -1559,13 +1535,10 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
 {
        .l3proto                = PF_INET,
        .l4proto                = IPPROTO_TCP,
-       .pkt_to_tuple           = tcp_pkt_to_tuple,
-       .invert_tuple           = tcp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
        .print_conntrack        = tcp_print_conntrack,
 #endif
        .packet                 = tcp_packet,
-       .get_timeouts           = tcp_get_timeouts,
        .new                    = tcp_new,
        .error                  = tcp_error,
        .can_early_drop         = tcp_can_early_drop,
@@ -1597,13 +1570,10 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
 {
        .l3proto                = PF_INET6,
        .l4proto                = IPPROTO_TCP,
-       .pkt_to_tuple           = tcp_pkt_to_tuple,
-       .invert_tuple           = tcp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
        .print_conntrack        = tcp_print_conntrack,
 #endif
        .packet                 = tcp_packet,
-       .get_timeouts           = tcp_get_timeouts,
        .new                    = tcp_new,
        .error                  = tcp_error,
        .can_early_drop         = tcp_can_early_drop,
index fe7243970aa454c88c68c6e16eaeea09ec012f81..7a1b8988a931ab15145d6371288549da51222076 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_log.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -36,33 +37,6 @@ static inline struct nf_udp_net *udp_pernet(struct net *net)
        return &net->ct.nf_ct_proto.udp;
 }
 
-static bool udp_pkt_to_tuple(const struct sk_buff *skb,
-                            unsigned int dataoff,
-                            struct net *net,
-                            struct nf_conntrack_tuple *tuple)
-{
-       const struct udphdr *hp;
-       struct udphdr _hdr;
-
-       /* Actually only need first 4 bytes to get ports. */
-       hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
-       if (hp == NULL)
-               return false;
-
-       tuple->src.u.udp.port = hp->source;
-       tuple->dst.u.udp.port = hp->dest;
-
-       return true;
-}
-
-static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
-                            const struct nf_conntrack_tuple *orig)
-{
-       tuple->src.u.udp.port = orig->dst.u.udp.port;
-       tuple->dst.u.udp.port = orig->src.u.udp.port;
-       return true;
-}
-
 static unsigned int *udp_get_timeouts(struct net *net)
 {
        return udp_pernet(net)->timeouts;
@@ -72,9 +46,14 @@ static unsigned int *udp_get_timeouts(struct net *net)
 static int udp_packet(struct nf_conn *ct,
                      const struct sk_buff *skb,
                      unsigned int dataoff,
-                     enum ip_conntrack_info ctinfo,
-                     unsigned int *timeouts)
+                     enum ip_conntrack_info ctinfo)
 {
+       unsigned int *timeouts;
+
+       timeouts = nf_ct_timeout_lookup(ct);
+       if (!timeouts)
+               timeouts = udp_get_timeouts(nf_ct_net(ct));
+
        /* If we've seen traffic both ways, this is some kind of UDP
           stream.  Extend timeout. */
        if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
@@ -92,7 +71,7 @@ static int udp_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
-                   unsigned int dataoff, unsigned int *timeouts)
+                   unsigned int dataoff)
 {
        return true;
 }
@@ -203,6 +182,9 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
        unsigned int *timeouts = data;
        struct nf_udp_net *un = udp_pernet(net);
 
+       if (!timeouts)
+               timeouts = un->timeouts;
+
        /* set default timeouts for UDP. */
        timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED];
        timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED];
@@ -301,10 +283,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
        .l3proto                = PF_INET,
        .l4proto                = IPPROTO_UDP,
        .allow_clash            = true,
-       .pkt_to_tuple           = udp_pkt_to_tuple,
-       .invert_tuple           = udp_invert_tuple,
        .packet                 = udp_packet,
-       .get_timeouts           = udp_get_timeouts,
        .new                    = udp_new,
        .error                  = udp_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -333,10 +312,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
        .l3proto                = PF_INET,
        .l4proto                = IPPROTO_UDPLITE,
        .allow_clash            = true,
-       .pkt_to_tuple           = udp_pkt_to_tuple,
-       .invert_tuple           = udp_invert_tuple,
        .packet                 = udp_packet,
-       .get_timeouts           = udp_get_timeouts,
        .new                    = udp_new,
        .error                  = udplite_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -365,10 +341,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
        .l3proto                = PF_INET6,
        .l4proto                = IPPROTO_UDP,
        .allow_clash            = true,
-       .pkt_to_tuple           = udp_pkt_to_tuple,
-       .invert_tuple           = udp_invert_tuple,
        .packet                 = udp_packet,
-       .get_timeouts           = udp_get_timeouts,
        .new                    = udp_new,
        .error                  = udp_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -397,10 +370,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
        .l3proto                = PF_INET6,
        .l4proto                = IPPROTO_UDPLITE,
        .allow_clash            = true,
-       .pkt_to_tuple           = udp_pkt_to_tuple,
-       .invert_tuple           = udp_invert_tuple,
        .packet                 = udp_packet,
-       .get_timeouts           = udp_get_timeouts,
        .new                    = udp_new,
        .error                  = udplite_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -423,3 +393,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
 #endif
+#include <net/netfilter/nf_conntrack_timeout.h>
index b642c0b2495c810e1b2abe04e00f2b1280e42bf5..13279f683da9786f3b6fea88b96ba96bc07f62c5 100644 (file)
@@ -1,12 +1,4 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/types.h>
 #include <linux/netfilter.h>
 #include <linux/slab.h>
@@ -24,7 +16,6 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <linux/rculist_nulls.h>
 
-MODULE_LICENSE("GPL");
+unsigned int nf_conntrack_net_id __read_mostly;
 
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 void
 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
-            const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *l4proto)
 {
-       switch (l3proto->l3proto) {
+       switch (tuple->src.l3num) {
        case NFPROTO_IPV4:
                seq_printf(s, "src=%pI4 dst=%pI4 ",
                           &tuple->src.u3.ip, &tuple->dst.u3.ip);
@@ -282,7 +272,6 @@ static int ct_seq_show(struct seq_file *s, void *v)
 {
        struct nf_conntrack_tuple_hash *hash = v;
        struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
-       const struct nf_conntrack_l3proto *l3proto;
        const struct nf_conntrack_l4proto *l4proto;
        struct net *net = seq_file_net(s);
        int ret = 0;
@@ -303,14 +292,12 @@ static int ct_seq_show(struct seq_file *s, void *v)
        if (!net_eq(nf_ct_net(ct), net))
                goto release;
 
-       l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-       WARN_ON(!l3proto);
        l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
        WARN_ON(!l4proto);
 
        ret = -ENOSPC;
        seq_printf(s, "%-8s %u %-8s %u ",
-                  l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+                  l3proto_name(nf_ct_l3num(ct)), nf_ct_l3num(ct),
                   l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
 
        if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
@@ -320,7 +307,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
                l4proto->print_conntrack(s, ct);
 
        print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-                   l3proto, l4proto);
+                   l4proto);
 
        ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
 
@@ -333,8 +320,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
        if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
                seq_puts(s, "[UNREPLIED] ");
 
-       print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-                   l3proto, l4proto);
+       print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l4proto);
 
        ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
 
@@ -680,6 +666,8 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
 static struct pernet_operations nf_conntrack_net_ops = {
        .init           = nf_conntrack_pernet_init,
        .exit_batch     = nf_conntrack_pernet_exit,
+       .id             = &nf_conntrack_net_id,
+       .size = sizeof(struct nf_conntrack_net),
 };
 
 static int __init nf_conntrack_standalone_init(void)
index eb0d1658ac0559d8d74444ac3bb7714662cbe506..d8125616edc79dd311c12dd1806c97dd4e24f4e4 100644 (file)
@@ -107,11 +107,12 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
        tcp->seen[1].td_maxwin = 0;
 }
 
+#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT        (120 * HZ)
+#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT        (30 * HZ)
+
 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
 {
        const struct nf_conntrack_l4proto *l4proto;
-       struct net *net = nf_ct_net(ct);
-       unsigned int *timeouts;
        unsigned int timeout;
        int l4num;
 
@@ -123,14 +124,10 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
        if (!l4proto)
                return;
 
-       timeouts = l4proto->get_timeouts(net);
-       if (!timeouts)
-               return;
-
        if (l4num == IPPROTO_TCP)
-               timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
+               timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
        else if (l4num == IPPROTO_UDP)
-               timeout = timeouts[UDP_CT_REPLIED];
+               timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
        else
                return;
 
index dc61399e30beb8a40e76dc81bde0f63f39740486..a8c5c846aec104df36dd6810b6877253ce89fef9 100644 (file)
@@ -132,9 +132,10 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
 
-void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
+void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+                           struct sock *sk)
 {
-       if (!sk || !sk_fullsock(sk))
+       if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk)))
                return;
 
        read_lock_bh(&sk->sk_callback_lock);
index 46f9df99d276c3be7ff5839ba41273df38e59a72..6366f0c0b8c1e8df3c7505132b0de90bebc0045b 100644 (file)
@@ -28,7 +28,6 @@
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_nat.h>
 
@@ -108,6 +107,7 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
        struct flowi fl;
        unsigned int hh_len;
        struct dst_entry *dst;
+       struct sock *sk = skb->sk;
        int err;
 
        err = xfrm_decode_session(skb, &fl, family);
@@ -119,7 +119,10 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
                dst = ((struct xfrm_dst *)dst)->route;
        dst_hold(dst);
 
-       dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
+       if (sk && !net_eq(net, sock_net(sk)))
+               sk = NULL;
+
+       dst = xfrm_lookup(net, dst, &fl, sk, 0);
        if (IS_ERR(dst))
                return PTR_ERR(dst);
 
@@ -739,12 +742,6 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
 
 int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
 {
-       int err;
-
-       err = nf_ct_l3proto_try_module_get(l3proto->l3proto);
-       if (err < 0)
-               return err;
-
        mutex_lock(&nf_nat_proto_mutex);
        RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
                         &nf_nat_l4proto_tcp);
@@ -777,7 +774,6 @@ void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
        synchronize_rcu();
 
        nf_nat_l3proto_clean(l3proto->l3proto);
-       nf_ct_l3proto_module_put(l3proto->l3proto);
 }
 EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
 
index 5ba5c7bef2f96354152f5e90031e206641b695c0..b44d62d5d9a920e2cc7238d1a3ba1eb66cb2a5b5 100644 (file)
 #include <linux/netfilter/nf_osf.h>
 
 static inline int nf_osf_ttl(const struct sk_buff *skb,
-                            const struct nf_osf_info *info,
-                            unsigned char f_ttl)
+                            int ttl_check, unsigned char f_ttl)
 {
        const struct iphdr *ip = ip_hdr(skb);
 
-       if (info->flags & NF_OSF_TTL) {
-               if (info->ttl == NF_OSF_TTL_TRUE)
+       if (ttl_check != -1) {
+               if (ttl_check == NF_OSF_TTL_TRUE)
                        return ip->ttl == f_ttl;
-               if (info->ttl == NF_OSF_TTL_NOCHECK)
+               if (ttl_check == NF_OSF_TTL_NOCHECK)
                        return 1;
                else if (ip->ttl <= f_ttl)
                        return 1;
@@ -52,140 +51,175 @@ static inline int nf_osf_ttl(const struct sk_buff *skb,
        return ip->ttl == f_ttl;
 }
 
-bool
-nf_osf_match(const struct sk_buff *skb, u_int8_t family,
-            int hooknum, struct net_device *in, struct net_device *out,
-            const struct nf_osf_info *info, struct net *net,
-            const struct list_head *nf_osf_fingers)
+struct nf_osf_hdr_ctx {
+       bool                    df;
+       u16                     window;
+       u16                     totlen;
+       const unsigned char     *optp;
+       unsigned int            optsize;
+};
+
+static bool nf_osf_match_one(const struct sk_buff *skb,
+                            const struct nf_osf_user_finger *f,
+                            int ttl_check,
+                            struct nf_osf_hdr_ctx *ctx)
 {
-       const unsigned char *optp = NULL, *_optp = NULL;
-       unsigned int optsize = 0, check_WSS = 0;
-       int fmatch = FMATCH_WRONG, fcount = 0;
-       const struct iphdr *ip = ip_hdr(skb);
-       const struct nf_osf_user_finger *f;
-       unsigned char opts[MAX_IPOPTLEN];
-       const struct nf_osf_finger *kf;
-       u16 window, totlen, mss = 0;
-       const struct tcphdr *tcp;
-       struct tcphdr _tcph;
-       bool df;
+       unsigned int check_WSS = 0;
+       int fmatch = FMATCH_WRONG;
+       int foptsize, optnum;
+       u16 mss = 0;
 
-       tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
-       if (!tcp)
+       if (ctx->totlen != f->ss || !nf_osf_ttl(skb, ttl_check, f->ttl))
                return false;
 
-       if (!tcp->syn)
+       /*
+        * Should not happen if userspace parser was written correctly.
+        */
+       if (f->wss.wc >= OSF_WSS_MAX)
                return false;
 
-       totlen = ntohs(ip->tot_len);
-       df = ntohs(ip->frag_off) & IP_DF;
-       window = ntohs(tcp->window);
+       /* Check options */
 
-       if (tcp->doff * 4 > sizeof(struct tcphdr)) {
-               optsize = tcp->doff * 4 - sizeof(struct tcphdr);
+       foptsize = 0;
+       for (optnum = 0; optnum < f->opt_num; ++optnum)
+               foptsize += f->opt[optnum].length;
 
-               _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
-                               sizeof(struct tcphdr), optsize, opts);
-       }
+       if (foptsize > MAX_IPOPTLEN ||
+           ctx->optsize > MAX_IPOPTLEN ||
+           ctx->optsize != foptsize)
+               return false;
 
-       list_for_each_entry_rcu(kf, &nf_osf_fingers[df], finger_entry) {
-               int foptsize, optnum;
+       check_WSS = f->wss.wc;
 
-               f = &kf->finger;
+       for (optnum = 0; optnum < f->opt_num; ++optnum) {
+               if (f->opt[optnum].kind == *ctx->optp) {
+                       __u32 len = f->opt[optnum].length;
+                       const __u8 *optend = ctx->optp + len;
 
-               if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
-                       continue;
+                       fmatch = FMATCH_OK;
+
+                       switch (*ctx->optp) {
+                       case OSFOPT_MSS:
+                               mss = ctx->optp[3];
+                               mss <<= 8;
+                               mss |= ctx->optp[2];
+
+                               mss = ntohs((__force __be16)mss);
+                               break;
+                       case OSFOPT_TS:
+                               break;
+                       }
+
+                       ctx->optp = optend;
+               } else
+                       fmatch = FMATCH_OPT_WRONG;
+
+               if (fmatch != FMATCH_OK)
+                       break;
+       }
 
-               optp = _optp;
+       if (fmatch != FMATCH_OPT_WRONG) {
                fmatch = FMATCH_WRONG;
 
-               if (totlen != f->ss || !nf_osf_ttl(skb, info, f->ttl))
-                       continue;
+               switch (check_WSS) {
+               case OSF_WSS_PLAIN:
+                       if (f->wss.val == 0 || ctx->window == f->wss.val)
+                               fmatch = FMATCH_OK;
+                       break;
+               case OSF_WSS_MSS:
+                       /*
+                        * Some smart modems decrease mangle MSS to
+                        * SMART_MSS_2, so we check standard, decreased
+                        * and the one provided in the fingerprint MSS
+                        * values.
+                        */
+#define SMART_MSS_1    1460
+#define SMART_MSS_2    1448
+                       if (ctx->window == f->wss.val * mss ||
+                           ctx->window == f->wss.val * SMART_MSS_1 ||
+                           ctx->window == f->wss.val * SMART_MSS_2)
+                               fmatch = FMATCH_OK;
+                       break;
+               case OSF_WSS_MTU:
+                       if (ctx->window == f->wss.val * (mss + 40) ||
+                           ctx->window == f->wss.val * (SMART_MSS_1 + 40) ||
+                           ctx->window == f->wss.val * (SMART_MSS_2 + 40))
+                               fmatch = FMATCH_OK;
+                       break;
+               case OSF_WSS_MODULO:
+                       if ((ctx->window % f->wss.val) == 0)
+                               fmatch = FMATCH_OK;
+                       break;
+               }
+       }
 
-               /*
-                * Should not happen if userspace parser was written correctly.
-                */
-               if (f->wss.wc >= OSF_WSS_MAX)
-                       continue;
+       return fmatch == FMATCH_OK;
+}
 
-               /* Check options */
+static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
+                                               const struct sk_buff *skb,
+                                               const struct iphdr *ip,
+                                               unsigned char *opts)
+{
+       const struct tcphdr *tcp;
+       struct tcphdr _tcph;
 
-               foptsize = 0;
-               for (optnum = 0; optnum < f->opt_num; ++optnum)
-                       foptsize += f->opt[optnum].length;
+       tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+       if (!tcp)
+               return NULL;
 
-               if (foptsize > MAX_IPOPTLEN ||
-                   optsize > MAX_IPOPTLEN ||
-                   optsize != foptsize)
-                       continue;
+       if (!tcp->syn)
+               return NULL;
 
-               check_WSS = f->wss.wc;
+       ctx->totlen = ntohs(ip->tot_len);
+       ctx->df = ntohs(ip->frag_off) & IP_DF;
+       ctx->window = ntohs(tcp->window);
 
-               for (optnum = 0; optnum < f->opt_num; ++optnum) {
-                       if (f->opt[optnum].kind == (*optp)) {
-                               __u32 len = f->opt[optnum].length;
-                               const __u8 *optend = optp + len;
+       if (tcp->doff * 4 > sizeof(struct tcphdr)) {
+               ctx->optsize = tcp->doff * 4 - sizeof(struct tcphdr);
 
-                               fmatch = FMATCH_OK;
+               ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) +
+                               sizeof(struct tcphdr), ctx->optsize, opts);
+       }
 
-                               switch (*optp) {
-                               case OSFOPT_MSS:
-                                       mss = optp[3];
-                                       mss <<= 8;
-                                       mss |= optp[2];
+       return tcp;
+}
 
-                                       mss = ntohs((__force __be16)mss);
-                                       break;
-                               case OSFOPT_TS:
-                                       break;
-                               }
+bool
+nf_osf_match(const struct sk_buff *skb, u_int8_t family,
+            int hooknum, struct net_device *in, struct net_device *out,
+            const struct nf_osf_info *info, struct net *net,
+            const struct list_head *nf_osf_fingers)
+{
+       const struct iphdr *ip = ip_hdr(skb);
+       const struct nf_osf_user_finger *f;
+       unsigned char opts[MAX_IPOPTLEN];
+       const struct nf_osf_finger *kf;
+       int fcount = 0, ttl_check;
+       int fmatch = FMATCH_WRONG;
+       struct nf_osf_hdr_ctx ctx;
+       const struct tcphdr *tcp;
 
-                               optp = optend;
-                       } else
-                               fmatch = FMATCH_OPT_WRONG;
+       memset(&ctx, 0, sizeof(ctx));
 
-                       if (fmatch != FMATCH_OK)
-                               break;
-               }
+       tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+       if (!tcp)
+               return false;
 
-               if (fmatch != FMATCH_OPT_WRONG) {
-                       fmatch = FMATCH_WRONG;
+       ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1;
 
-                       switch (check_WSS) {
-                       case OSF_WSS_PLAIN:
-                               if (f->wss.val == 0 || window == f->wss.val)
-                                       fmatch = FMATCH_OK;
-                               break;
-                       case OSF_WSS_MSS:
-                               /*
-                                * Some smart modems decrease mangle MSS to
-                                * SMART_MSS_2, so we check standard, decreased
-                                * and the one provided in the fingerprint MSS
-                                * values.
-                                */
-#define SMART_MSS_1    1460
-#define SMART_MSS_2    1448
-                               if (window == f->wss.val * mss ||
-                                   window == f->wss.val * SMART_MSS_1 ||
-                                   window == f->wss.val * SMART_MSS_2)
-                                       fmatch = FMATCH_OK;
-                               break;
-                       case OSF_WSS_MTU:
-                               if (window == f->wss.val * (mss + 40) ||
-                                   window == f->wss.val * (SMART_MSS_1 + 40) ||
-                                   window == f->wss.val * (SMART_MSS_2 + 40))
-                                       fmatch = FMATCH_OK;
-                               break;
-                       case OSF_WSS_MODULO:
-                               if ((window % f->wss.val) == 0)
-                                       fmatch = FMATCH_OK;
-                               break;
-                       }
-               }
+       list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
 
-               if (fmatch != FMATCH_OK)
+               f = &kf->finger;
+
+               if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
+                       continue;
+
+               if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
                        continue;
 
+               fmatch = FMATCH_OK;
+
                fcount++;
 
                if (info->flags & NF_OSF_LOG)
index f5745e4c6513e7a6bc8d1814e6efb3f497f76870..f180856398072cae590615c5352dc92378cae331 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/vmalloc.h>
+#include <linux/rhashtable.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nf_tables.h>
@@ -455,20 +456,59 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
        return NULL;
 }
 
+/*
+ * Loading a module requires dropping mutex that guards the
+ * transaction.
+ * We first need to abort any pending transactions as once
+ * mutex is unlocked a different client could start a new
+ * transaction.  It must not see any 'future generation'
+ * changes * as these changes will never happen.
+ */
+#ifdef CONFIG_MODULES
+static int __nf_tables_abort(struct net *net);
+
+static void nft_request_module(struct net *net, const char *fmt, ...)
+{
+       char module_name[MODULE_NAME_LEN];
+       va_list args;
+       int ret;
+
+       __nf_tables_abort(net);
+
+       va_start(args, fmt);
+       ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
+       va_end(args);
+       if (WARN(ret >= MODULE_NAME_LEN, "truncated: '%s' (len %d)", module_name, ret))
+               return;
+
+       mutex_unlock(&net->nft.commit_mutex);
+       request_module("%s", module_name);
+       mutex_lock(&net->nft.commit_mutex);
+}
+#endif
+
+static void lockdep_nfnl_nft_mutex_not_held(void)
+{
+#ifdef CONFIG_PROVE_LOCKING
+       WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+#endif
+}
+
 static const struct nft_chain_type *
-nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
+nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
+                           u8 family, bool autoload)
 {
        const struct nft_chain_type *type;
 
        type = __nf_tables_chain_type_lookup(nla, family);
        if (type != NULL)
                return type;
+
+       lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
        if (autoload) {
-               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-               request_module("nft-chain-%u-%.*s", family,
-                              nla_len(nla), (const char *)nla_data(nla));
-               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+               nft_request_module(net, "nft-chain-%u-%.*s", family,
+                                  nla_len(nla), (const char *)nla_data(nla));
                type = __nf_tables_chain_type_lookup(nla, family);
                if (type != NULL)
                        return ERR_PTR(-EAGAIN);
@@ -772,6 +812,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
        struct nft_ctx ctx;
        int err;
 
+       lockdep_assert_held(&net->nft.commit_mutex);
        attr = nla[NFTA_TABLE_NAME];
        table = nft_table_lookup(net, attr, family, genmask);
        if (IS_ERR(table)) {
@@ -1012,7 +1053,17 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
        return ERR_PTR(-ENOENT);
 }
 
-static struct nft_chain *nft_chain_lookup(struct nft_table *table,
+static bool lockdep_commit_lock_is_held(struct net *net)
+{
+#ifdef CONFIG_PROVE_LOCKING
+       return lockdep_is_held(&net->nft.commit_mutex);
+#else
+       return true;
+#endif
+}
+
+static struct nft_chain *nft_chain_lookup(struct net *net,
+                                         struct nft_table *table,
                                          const struct nlattr *nla, u8 genmask)
 {
        char search[NFT_CHAIN_MAXNAMELEN + 1];
@@ -1025,7 +1076,7 @@ static struct nft_chain *nft_chain_lookup(struct nft_table *table,
        nla_strlcpy(search, nla, sizeof(search));
 
        WARN_ON(!rcu_read_lock_held() &&
-               !lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+               !lockdep_commit_lock_is_held(net));
 
        chain = ERR_PTR(-ENOENT);
        rcu_read_lock();
@@ -1265,7 +1316,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
                return PTR_ERR(table);
        }
 
-       chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+       chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask);
        if (IS_ERR(chain)) {
                NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
                return PTR_ERR(chain);
@@ -1398,6 +1449,9 @@ static int nft_chain_parse_hook(struct net *net,
        struct net_device *dev;
        int err;
 
+       lockdep_assert_held(&net->nft.commit_mutex);
+       lockdep_nfnl_nft_mutex_not_held();
+
        err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
                               nft_hook_policy, NULL);
        if (err < 0)
@@ -1412,7 +1466,7 @@ static int nft_chain_parse_hook(struct net *net,
 
        type = chain_type[family][NFT_CHAIN_T_DEFAULT];
        if (nla[NFTA_CHAIN_TYPE]) {
-               type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
+               type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE],
                                                   family, create);
                if (IS_ERR(type))
                        return PTR_ERR(type);
@@ -1631,7 +1685,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
            nla[NFTA_CHAIN_NAME]) {
                struct nft_chain *chain2;
 
-               chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+               chain2 = nft_chain_lookup(ctx->net, table,
+                                         nla[NFTA_CHAIN_NAME], genmask);
                if (!IS_ERR(chain2))
                        return -EEXIST;
        }
@@ -1710,6 +1765,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 
        create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
+       lockdep_assert_held(&net->nft.commit_mutex);
+
        table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
        if (IS_ERR(table)) {
                NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
@@ -1728,7 +1785,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
                }
                attr = nla[NFTA_CHAIN_HANDLE];
        } else {
-               chain = nft_chain_lookup(table, attr, genmask);
+               chain = nft_chain_lookup(net, table, attr, genmask);
                if (IS_ERR(chain)) {
                        if (PTR_ERR(chain) != -ENOENT) {
                                NL_SET_BAD_ATTR(extack, attr);
@@ -1806,7 +1863,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
                chain = nft_chain_lookup_byhandle(table, handle, genmask);
        } else {
                attr = nla[NFTA_CHAIN_NAME];
-               chain = nft_chain_lookup(table, attr, genmask);
+               chain = nft_chain_lookup(net, table, attr, genmask);
        }
        if (IS_ERR(chain)) {
                NL_SET_BAD_ATTR(extack, attr);
@@ -1891,7 +1948,8 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
        return NULL;
 }
 
-static const struct nft_expr_type *nft_expr_type_get(u8 family,
+static const struct nft_expr_type *nft_expr_type_get(struct net *net,
+                                                    u8 family,
                                                     struct nlattr *nla)
 {
        const struct nft_expr_type *type;
@@ -1903,19 +1961,16 @@ static const struct nft_expr_type *nft_expr_type_get(u8 family,
        if (type != NULL && try_module_get(type->owner))
                return type;
 
+       lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
        if (type == NULL) {
-               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-               request_module("nft-expr-%u-%.*s", family,
-                              nla_len(nla), (char *)nla_data(nla));
-               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+               nft_request_module(net, "nft-expr-%u-%.*s", family,
+                                  nla_len(nla), (char *)nla_data(nla));
                if (__nft_expr_type_get(family, nla))
                        return ERR_PTR(-EAGAIN);
 
-               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-               request_module("nft-expr-%.*s",
-                              nla_len(nla), (char *)nla_data(nla));
-               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+               nft_request_module(net, "nft-expr-%.*s",
+                                  nla_len(nla), (char *)nla_data(nla));
                if (__nft_expr_type_get(family, nla))
                        return ERR_PTR(-EAGAIN);
        }
@@ -1984,7 +2039,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
        if (err < 0)
                return err;
 
-       type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
+       type = nft_expr_type_get(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]);
        if (IS_ERR(type))
                return PTR_ERR(type);
 
@@ -2349,7 +2404,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
                return PTR_ERR(table);
        }
 
-       chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+       chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
        if (IS_ERR(chain)) {
                NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
                return PTR_ERR(chain);
@@ -2383,6 +2438,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 {
        struct nft_expr *expr;
 
+       lockdep_assert_held(&ctx->net->nft.commit_mutex);
        /*
         * Careful: some expressions might not be initialized in case this
         * is called on error from nf_tables_newrule().
@@ -2454,8 +2510,6 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
 
 #define NFT_RULE_MAXEXPRS      128
 
-static struct nft_expr_info *info;
-
 static int nf_tables_newrule(struct net *net, struct sock *nlsk,
                             struct sk_buff *skb, const struct nlmsghdr *nlh,
                             const struct nlattr * const nla[],
@@ -2463,6 +2517,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        u8 genmask = nft_genmask_next(net);
+       struct nft_expr_info *info = NULL;
        int family = nfmsg->nfgen_family;
        struct nft_table *table;
        struct nft_chain *chain;
@@ -2477,6 +2532,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
        bool create;
        u64 handle, pos_handle;
 
+       lockdep_assert_held(&net->nft.commit_mutex);
+
        create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
        table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
@@ -2485,7 +2542,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
                return PTR_ERR(table);
        }
 
-       chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+       chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
        if (IS_ERR(chain)) {
                NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
                return PTR_ERR(chain);
@@ -2533,6 +2590,12 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
        n = 0;
        size = 0;
        if (nla[NFTA_RULE_EXPRESSIONS]) {
+               info = kvmalloc_array(NFT_RULE_MAXEXPRS,
+                                     sizeof(struct nft_expr_info),
+                                     GFP_KERNEL);
+               if (!info)
+                       return -ENOMEM;
+
                nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
                        err = -EINVAL;
                        if (nla_type(tmp) != NFTA_LIST_ELEM)
@@ -2625,6 +2688,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
                                list_add_rcu(&rule->list, &chain->rules);
                }
        }
+       kvfree(info);
        chain->use++;
 
        if (net->nft.validate_state == NFT_VALIDATE_DO)
@@ -2638,6 +2702,7 @@ err1:
                if (info[i].ops != NULL)
                        module_put(info[i].ops->type->owner);
        }
+       kvfree(info);
        return err;
 }
 
@@ -2677,7 +2742,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
        }
 
        if (nla[NFTA_RULE_CHAIN]) {
-               chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+               chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
+                                        genmask);
                if (IS_ERR(chain)) {
                        NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
                        return PTR_ERR(chain);
@@ -2769,11 +2835,11 @@ nft_select_set_ops(const struct nft_ctx *ctx,
        const struct nft_set_type *type;
        u32 flags = 0;
 
+       lockdep_assert_held(&ctx->net->nft.commit_mutex);
+       lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
        if (list_empty(&nf_tables_set_types)) {
-               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-               request_module("nft-set");
-               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+               nft_request_module(ctx->net, "nft-set");
                if (!list_empty(&nf_tables_set_types))
                        return ERR_PTR(-EAGAIN);
        }
@@ -4818,7 +4884,8 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
        return NULL;
 }
 
-static const struct nft_object_type *nft_obj_type_get(u32 objtype)
+static const struct nft_object_type *
+nft_obj_type_get(struct net *net, u32 objtype)
 {
        const struct nft_object_type *type;
 
@@ -4826,11 +4893,10 @@ static const struct nft_object_type *nft_obj_type_get(u32 objtype)
        if (type != NULL && try_module_get(type->owner))
                return type;
 
+       lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
        if (type == NULL) {
-               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-               request_module("nft-obj-%u", objtype);
-               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+               nft_request_module(net, "nft-obj-%u", objtype);
                if (__nft_obj_type_get(objtype))
                        return ERR_PTR(-EAGAIN);
        }
@@ -4882,7 +4948,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 
        nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
 
-       type = nft_obj_type_get(objtype);
+       type = nft_obj_type_get(net, objtype);
        if (IS_ERR(type))
                return PTR_ERR(type);
 
@@ -5372,7 +5438,8 @@ static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
        return NULL;
 }
 
-static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
+static const struct nf_flowtable_type *
+nft_flowtable_type_get(struct net *net, u8 family)
 {
        const struct nf_flowtable_type *type;
 
@@ -5380,11 +5447,10 @@ static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
        if (type != NULL && try_module_get(type->owner))
                return type;
 
+       lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
        if (type == NULL) {
-               nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-               request_module("nf-flowtable-%u", family);
-               nfnl_lock(NFNL_SUBSYS_NFTABLES);
+               nft_request_module(net, "nf-flowtable-%u", family);
                if (__nft_flowtable_type_get(family))
                        return ERR_PTR(-EAGAIN);
        }
@@ -5464,7 +5530,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
                goto err1;
        }
 
-       type = nft_flowtable_type_get(family);
+       type = nft_flowtable_type_get(net, family);
        if (IS_ERR(type)) {
                err = PTR_ERR(type);
                goto err2;
@@ -6232,9 +6298,9 @@ static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *cha
        next_genbit = nft_gencursor_next(net);
 
        g0 = rcu_dereference_protected(chain->rules_gen_0,
-                                      lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+                                      lockdep_commit_lock_is_held(net));
        g1 = rcu_dereference_protected(chain->rules_gen_1,
-                                      lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+                                      lockdep_commit_lock_is_held(net));
 
        /* No changes to this chain? */
        if (chain->rules_next == NULL) {
@@ -6444,6 +6510,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
        nf_tables_commit_release(net);
        nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+       mutex_unlock(&net->nft.commit_mutex);
 
        return 0;
 }
@@ -6595,12 +6662,25 @@ static void nf_tables_cleanup(struct net *net)
 
 static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 {
-       return __nf_tables_abort(net);
+       int ret = __nf_tables_abort(net);
+
+       mutex_unlock(&net->nft.commit_mutex);
+
+       return ret;
 }
 
 static bool nf_tables_valid_genid(struct net *net, u32 genid)
 {
-       return net->nft.base_seq == genid;
+       bool genid_ok;
+
+       mutex_lock(&net->nft.commit_mutex);
+
+       genid_ok = genid == 0 || net->nft.base_seq == genid;
+       if (!genid_ok)
+               mutex_unlock(&net->nft.commit_mutex);
+
+       /* else, commit mutex has to be released by commit or abort function */
+       return genid_ok;
 }
 
 static const struct nfnetlink_subsystem nf_tables_subsys = {
@@ -6612,6 +6692,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
        .abort          = nf_tables_abort,
        .cleanup        = nf_tables_cleanup,
        .valid_genid    = nf_tables_valid_genid,
+       .owner          = THIS_MODULE,
 };
 
 int nft_chain_validate_dependency(const struct nft_chain *chain,
@@ -6931,8 +7012,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
        case NFT_GOTO:
                if (!tb[NFTA_VERDICT_CHAIN])
                        return -EINVAL;
-               chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN],
-                                        genmask);
+               chain = nft_chain_lookup(ctx->net, ctx->table,
+                                        tb[NFTA_VERDICT_CHAIN], genmask);
                if (IS_ERR(chain))
                        return PTR_ERR(chain);
                if (nft_is_base_chain(chain))
@@ -7177,6 +7258,7 @@ static int __net_init nf_tables_init_net(struct net *net)
 {
        INIT_LIST_HEAD(&net->nft.tables);
        INIT_LIST_HEAD(&net->nft.commit_list);
+       mutex_init(&net->nft.commit_mutex);
        net->nft.base_seq = 1;
        net->nft.validate_state = NFT_VALIDATE_SKIP;
 
@@ -7185,11 +7267,11 @@ static int __net_init nf_tables_init_net(struct net *net)
 
 static void __net_exit nf_tables_exit_net(struct net *net)
 {
-       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+       mutex_lock(&net->nft.commit_mutex);
        if (!list_empty(&net->nft.commit_list))
                __nf_tables_abort(net);
        __nft_release_tables(net);
-       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+       mutex_unlock(&net->nft.commit_mutex);
        WARN_ON_ONCE(!list_empty(&net->nft.tables));
 }
 
@@ -7204,29 +7286,19 @@ static int __init nf_tables_module_init(void)
 
        nft_chain_filter_init();
 
-       info = kmalloc_array(NFT_RULE_MAXEXPRS, sizeof(struct nft_expr_info),
-                            GFP_KERNEL);
-       if (info == NULL) {
-               err = -ENOMEM;
-               goto err1;
-       }
-
        err = nf_tables_core_module_init();
        if (err < 0)
-               goto err2;
+               return err;
 
        err = nfnetlink_subsys_register(&nf_tables_subsys);
        if (err < 0)
-               goto err3;
+               goto err;
 
        register_netdevice_notifier(&nf_tables_flowtable_notifier);
 
        return register_pernet_subsys(&nf_tables_net_ops);
-err3:
+err:
        nf_tables_core_module_exit();
-err2:
-       kfree(info);
-err1:
        return err;
 }
 
@@ -7238,7 +7310,6 @@ static void __exit nf_tables_module_exit(void)
        unregister_pernet_subsys(&nf_tables_net_ops);
        rcu_barrier();
        nf_tables_core_module_exit();
-       kfree(info);
 }
 
 module_init(nf_tables_module_init);
index e1b6be29848d2c00590ce98458f84b4243a5b45b..916913454624f2740212c62d1c0ce61bc8ae6f73 100644 (file)
@@ -331,18 +331,27 @@ replay:
                }
        }
 
-       if (!ss->commit || !ss->abort) {
+       if (!ss->valid_genid || !ss->commit || !ss->abort) {
                nfnl_unlock(subsys_id);
                netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
                return kfree_skb(skb);
        }
 
-       if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) {
+       if (!try_module_get(ss->owner)) {
+               nfnl_unlock(subsys_id);
+               netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
+               return kfree_skb(skb);
+       }
+
+       if (!ss->valid_genid(net, genid)) {
+               module_put(ss->owner);
                nfnl_unlock(subsys_id);
                netlink_ack(oskb, nlh, -ERESTART, NULL);
                return kfree_skb(skb);
        }
 
+       nfnl_unlock(subsys_id);
+
        while (skb->len >= nlmsg_total_size(0)) {
                int msglen, type;
 
@@ -464,14 +473,10 @@ ack:
        }
 done:
        if (status & NFNL_BATCH_REPLAY) {
-               const struct nfnetlink_subsystem *ss2;
-
-               ss2 = nfnl_dereference_protected(subsys_id);
-               if (ss2 == ss)
-                       ss->abort(net, oskb);
+               ss->abort(net, oskb);
                nfnl_err_reset(&err_list);
-               nfnl_unlock(subsys_id);
                kfree_skb(skb);
+               module_put(ss->owner);
                goto replay;
        } else if (status == NFNL_BATCH_DONE) {
                err = ss->commit(net, oskb);
@@ -489,8 +494,8 @@ done:
                ss->cleanup(net);
 
        nfnl_err_deliver(&err_list, oskb);
-       nfnl_unlock(subsys_id);
        kfree_skb(skb);
+       module_put(ss->owner);
 }
 
 static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = {
index 9ee5fa551fa68c2a04d34881817f0b27ff066783..d9d952fad3e0beae21b02f4152060c696c32ede7 100644 (file)
@@ -26,7 +26,6 @@
 #include <net/sock.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
@@ -47,7 +46,7 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
 };
 
 static int
-ctnl_timeout_parse_policy(void *timeouts,
+ctnl_timeout_parse_policy(void *timeout,
                          const struct nf_conntrack_l4proto *l4proto,
                          struct net *net, const struct nlattr *attr)
 {
@@ -68,7 +67,7 @@ ctnl_timeout_parse_policy(void *timeouts,
        if (ret < 0)
                goto err;
 
-       ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
+       ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout);
 
 err:
        kfree(tb);
@@ -373,7 +372,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
                                 struct netlink_ext_ack *extack)
 {
        const struct nf_conntrack_l4proto *l4proto;
-       unsigned int *timeouts;
        __u16 l3num;
        __u8 l4num;
        int ret;
@@ -393,9 +391,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
                goto err;
        }
 
-       timeouts = l4proto->get_timeouts(net);
-
-       ret = ctnl_timeout_parse_policy(timeouts, l4proto, net,
+       ret = ctnl_timeout_parse_policy(NULL, l4proto, net,
                                        cda[CTA_TIMEOUT_DATA]);
        if (ret < 0)
                goto err;
@@ -432,7 +428,6 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 
        if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
                struct nlattr *nest_parms;
-               unsigned int *timeouts = l4proto->get_timeouts(net);
                int ret;
 
                nest_parms = nla_nest_start(skb,
@@ -440,7 +435,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
                if (!nest_parms)
                        goto nla_put_failure;
 
-               ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
+               ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
                if (ret < 0)
                        goto nla_put_failure;
 
index d21834bed805b789c72d79640d8e98b2a6cf247c..ea5b7c4944f69442c5811fb9124de53b8a19dc10 100644 (file)
@@ -322,7 +322,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
        if (!ctx.net)
                return NOTIFY_DONE;
 
-       nfnl_lock(NFNL_SUBSYS_NFTABLES);
+       mutex_lock(&ctx.net->nft.commit_mutex);
        list_for_each_entry(table, &ctx.net->nft.tables, list) {
                if (table->family != NFPROTO_NETDEV)
                        continue;
@@ -337,7 +337,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
                        nft_netdev_event(event, dev, &ctx);
                }
        }
-       nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+       mutex_unlock(&ctx.net->nft.commit_mutex);
        put_net(ctx.net);
 
        return NOTIFY_DONE;
index a832c59f0a9cbeb30cd1d261c2be81fc8b5d7027..b90d96ba4a12933a1832c0836a01cc406530edb3 100644 (file)
 #include <net/netfilter/nf_conntrack_zones.h>
 
 struct nft_connlimit {
-       spinlock_t              lock;
-       struct hlist_head       hhead;
-       u32                     limit;
-       bool                    invert;
+       struct nf_conncount_list        list;
+       u32                             limit;
+       bool                            invert;
 };
 
 static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
@@ -45,21 +44,19 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
                return;
        }
 
-       spin_lock_bh(&priv->lock);
-       count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone,
-                                   &addit);
+       nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
+                           &addit);
+       count = priv->list.count;
 
        if (!addit)
                goto out;
 
-       if (!nf_conncount_add(&priv->hhead, tuple_ptr, zone)) {
+       if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
                regs->verdict.code = NF_DROP;
-               spin_unlock_bh(&priv->lock);
                return;
        }
        count++;
 out:
-       spin_unlock_bh(&priv->lock);
 
        if ((count > priv->limit) ^ priv->invert) {
                regs->verdict.code = NFT_BREAK;
@@ -87,8 +84,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
                        invert = true;
        }
 
-       spin_lock_init(&priv->lock);
-       INIT_HLIST_HEAD(&priv->hhead);
+       nf_conncount_list_init(&priv->list);
        priv->limit     = limit;
        priv->invert    = invert;
 
@@ -99,7 +95,7 @@ static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
                                     struct nft_connlimit *priv)
 {
        nf_ct_netns_put(ctx->net, ctx->family);
-       nf_conncount_cache_free(&priv->hhead);
+       nf_conncount_cache_free(&priv->list);
 }
 
 static int nft_connlimit_do_dump(struct sk_buff *skb,
@@ -212,8 +208,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
        struct nft_connlimit *priv_dst = nft_expr_priv(dst);
        struct nft_connlimit *priv_src = nft_expr_priv(src);
 
-       spin_lock_init(&priv_dst->lock);
-       INIT_HLIST_HEAD(&priv_dst->hhead);
+       nf_conncount_list_init(&priv_dst->list);
        priv_dst->limit  = priv_src->limit;
        priv_dst->invert = priv_src->invert;
 
@@ -225,21 +220,14 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
 {
        struct nft_connlimit *priv = nft_expr_priv(expr);
 
-       nf_conncount_cache_free(&priv->hhead);
+       nf_conncount_cache_free(&priv->list);
 }
 
 static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
 {
        struct nft_connlimit *priv = nft_expr_priv(expr);
-       bool addit, ret;
 
-       spin_lock_bh(&priv->lock);
-       nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit);
-
-       ret = hlist_empty(&priv->hhead);
-       spin_unlock_bh(&priv->lock);
-
-       return ret;
+       return nf_conncount_gc_list(net, &priv->list);
 }
 
 static struct nft_expr_type nft_connlimit_type;
index 1435ffc5f57ed4f652de56b1063208717c28bab6..3bc82ee5464d164d69c03f25e43f0833a5cae3f4 100644 (file)
@@ -870,7 +870,7 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj,
        if (test_bit(IPS_HELPER_BIT, &ct->status))
                return;
 
-       help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
+       help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
        if (help) {
                rcu_assign_pointer(help->helper, to_assign);
                set_bit(IPS_HELPER_BIT, &ct->status);
index 27d7e4598ab63c982b034136a3115f710b545679..81184c244d1a941c659382348b121e7f6e495687 100644 (file)
@@ -118,6 +118,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
        u64 timeout;
        int err;
 
+       lockdep_assert_held(&ctx->net->nft.commit_mutex);
+
        if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
            tb[NFTA_DYNSET_OP] == NULL ||
            tb[NFTA_DYNSET_SREG_KEY] == NULL)
index 1105a23bda5ec93a260dbce5be9175efa2d96c71..2b94dcc4345656a852e171afd7bb853f252ccaf7 100644 (file)
@@ -107,7 +107,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
                break;
        case NFT_META_SKUID:
                sk = skb_to_full_sk(skb);
-               if (!sk || !sk_fullsock(sk))
+               if (!sk || !sk_fullsock(sk) ||
+                   !net_eq(nft_net(pkt), sock_net(sk)))
                        goto err;
 
                read_lock_bh(&sk->sk_callback_lock);
@@ -123,7 +124,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
                break;
        case NFT_META_SKGID:
                sk = skb_to_full_sk(skb);
-               if (!sk || !sk_fullsock(sk))
+               if (!sk || !sk_fullsock(sk) ||
+                   !net_eq(nft_net(pkt), sock_net(sk)))
                        goto err;
 
                read_lock_bh(&sk->sk_callback_lock);
@@ -214,7 +216,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 #ifdef CONFIG_CGROUP_NET_CLASSID
        case NFT_META_CGROUP:
                sk = skb_to_full_sk(skb);
-               if (!sk || !sk_fullsock(sk))
+               if (!sk || !sk_fullsock(sk) ||
+                   !net_eq(nft_net(pkt), sock_net(sk)))
                        goto err;
                *dest = sock_cgroup_classid(&sk->sk_cgrp_data);
                break;
index 74e1b3bd695417daf3afb725d3183658a3a81342..d7f3776dfd719d402ae178890107db8ebd1627f2 100644 (file)
@@ -23,12 +23,15 @@ static void nft_socket_eval(const struct nft_expr *expr,
        struct sock *sk = skb->sk;
        u32 *dest = &regs->data[priv->dreg];
 
+       if (sk && !net_eq(nft_net(pkt), sock_net(sk)))
+               sk = NULL;
+
        if (!sk)
                switch(nft_pf(pkt)) {
                case NFPROTO_IPV4:
                        sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
                        break;
-#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
                case NFPROTO_IPV6:
                        sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
                        break;
@@ -39,8 +42,8 @@ static void nft_socket_eval(const struct nft_expr *expr,
                        return;
                }
 
-       if(!sk) {
-               nft_reg_store8(dest, 0);
+       if (!sk) {
+               regs->verdict.code = NFT_BREAK;
                return;
        }
 
@@ -51,6 +54,14 @@ static void nft_socket_eval(const struct nft_expr *expr,
        case NFT_SOCKET_TRANSPARENT:
                nft_reg_store8(dest, inet_sk_transparent(sk));
                break;
+       case NFT_SOCKET_MARK:
+               if (sk_fullsock(sk)) {
+                       *dest = sk->sk_mark;
+               } else {
+                       regs->verdict.code = NFT_BREAK;
+                       return;
+               }
+               break;
        default:
                WARN_ON(1);
                regs->verdict.code = NFT_BREAK;
@@ -74,7 +85,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 
        switch(ctx->family) {
        case NFPROTO_IPV4:
-#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
        case NFPROTO_IPV6:
 #endif
        case NFPROTO_INET:
@@ -88,6 +99,9 @@ static int nft_socket_init(const struct nft_ctx *ctx,
        case NFT_SOCKET_TRANSPARENT:
                len = sizeof(u8);
                break;
+       case NFT_SOCKET_MARK:
+               len = sizeof(u32);
+               break;
        default:
                return -EOPNOTSUPP;
        }
index 0b660c568156e60978c129efd78cf836c99af2c9..e8da9a9bba73f04bbd016a4c26d9960fe5fc3c26 100644 (file)
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/ip6_checksum.h>
+
+#ifdef CONFIG_INET
+__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+                      unsigned int dataoff, u8 protocol)
+{
+       const struct iphdr *iph = ip_hdr(skb);
+       __sum16 csum = 0;
+
+       switch (skb->ip_summed) {
+       case CHECKSUM_COMPLETE:
+               if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+                       break;
+               if ((protocol == 0 && !csum_fold(skb->csum)) ||
+                   !csum_tcpudp_magic(iph->saddr, iph->daddr,
+                                      skb->len - dataoff, protocol,
+                                      skb->csum)) {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       break;
+               }
+               /* fall through */
+       case CHECKSUM_NONE:
+               if (protocol == 0)
+                       skb->csum = 0;
+               else
+                       skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+                                                      skb->len - dataoff,
+                                                      protocol, 0);
+               csum = __skb_checksum_complete(skb);
+       }
+       return csum;
+}
+EXPORT_SYMBOL(nf_ip_checksum);
+#endif
+
+static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+                                     unsigned int dataoff, unsigned int len,
+                                     u8 protocol)
+{
+       const struct iphdr *iph = ip_hdr(skb);
+       __sum16 csum = 0;
+
+       switch (skb->ip_summed) {
+       case CHECKSUM_COMPLETE:
+               if (len == skb->len - dataoff)
+                       return nf_ip_checksum(skb, hook, dataoff, protocol);
+               /* fall through */
+       case CHECKSUM_NONE:
+               skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
+                                              skb->len - dataoff, 0);
+               skb->ip_summed = CHECKSUM_NONE;
+               return __skb_checksum_complete_head(skb, dataoff + len);
+       }
+       return csum;
+}
+
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+                       unsigned int dataoff, u8 protocol)
+{
+       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       __sum16 csum = 0;
+
+       switch (skb->ip_summed) {
+       case CHECKSUM_COMPLETE:
+               if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+                       break;
+               if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+                                    skb->len - dataoff, protocol,
+                                    csum_sub(skb->csum,
+                                             skb_checksum(skb, 0,
+                                                          dataoff, 0)))) {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       break;
+               }
+               /* fall through */
+       case CHECKSUM_NONE:
+               skb->csum = ~csum_unfold(
+                               csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+                                            skb->len - dataoff,
+                                            protocol,
+                                            csum_sub(0,
+                                                     skb_checksum(skb, 0,
+                                                                  dataoff, 0))));
+               csum = __skb_checksum_complete(skb);
+       }
+       return csum;
+}
+EXPORT_SYMBOL(nf_ip6_checksum);
+
+static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
+                                      unsigned int dataoff, unsigned int len,
+                                      u8 protocol)
+{
+       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       __wsum hsum;
+       __sum16 csum = 0;
+
+       switch (skb->ip_summed) {
+       case CHECKSUM_COMPLETE:
+               if (len == skb->len - dataoff)
+                       return nf_ip6_checksum(skb, hook, dataoff, protocol);
+               /* fall through */
+       case CHECKSUM_NONE:
+               hsum = skb_checksum(skb, 0, dataoff, 0);
+               skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+                                                        &ip6h->daddr,
+                                                        skb->len - dataoff,
+                                                        protocol,
+                                                        csum_sub(0, hsum)));
+               skb->ip_summed = CHECKSUM_NONE;
+               return __skb_checksum_complete_head(skb, dataoff + len);
+       }
+       return csum;
+};
 
 __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
-                   unsigned int dataoff, u_int8_t protocol,
+                   unsigned int dataoff, u8 protocol,
                    unsigned short family)
 {
-       const struct nf_ipv6_ops *v6ops;
        __sum16 csum = 0;
 
        switch (family) {
@@ -16,9 +130,7 @@ __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
                csum = nf_ip_checksum(skb, hook, dataoff, protocol);
                break;
        case AF_INET6:
-               v6ops = rcu_dereference(nf_ipv6_ops);
-               if (v6ops)
-                       csum = v6ops->checksum(skb, hook, dataoff, protocol);
+               csum = nf_ip6_checksum(skb, hook, dataoff, protocol);
                break;
        }
 
@@ -28,9 +140,8 @@ EXPORT_SYMBOL_GPL(nf_checksum);
 
 __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
                            unsigned int dataoff, unsigned int len,
-                           u_int8_t protocol, unsigned short family)
+                           u8 protocol, unsigned short family)
 {
-       const struct nf_ipv6_ops *v6ops;
        __sum16 csum = 0;
 
        switch (family) {
@@ -39,10 +150,8 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
                                              protocol);
                break;
        case AF_INET6:
-               v6ops = rcu_dereference(nf_ipv6_ops);
-               if (v6ops)
-                       csum = v6ops->checksum_partial(skb, hook, dataoff, len,
-                                                      protocol);
+               csum = nf_ip6_checksum_partial(skb, hook, dataoff, len,
+                                              protocol);
                break;
        }
 
index 03b9a50ec93bd958d1ef46d59a981f8be1d89aa4..7ba454e9e3fa3a89492e9fad1d41bc21a95eb5c7 100644 (file)
@@ -93,7 +93,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
                return -ENOENT;
        }
 
-       help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
+       help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
        if (help == NULL) {
                nf_conntrack_helper_put(helper);
                return -ENOMEM;
index 475957cfcf50a36eaecc1e5e825c007ae7d2880b..0d0d68c989df5014aa9794c5d07658b0d26d0a95 100644 (file)
@@ -38,7 +38,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
        return XT_CONTINUE;
 }
 
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 static unsigned int
 tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -141,7 +141,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
                .destroy    = tee_tg_destroy,
                .me         = THIS_MODULE,
        },
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
        {
                .name       = "TEE",
                .revision   = 1,
index d76550a8b642aafd96853332d18db898e43ff587..ad7420cdc4395c0852bf8ea0a215ebb7cbc0c053 100644 (file)
 #include <net/netfilter/nf_tproxy.h>
 #include <linux/netfilter/xt_TPROXY.h>
 
-/* assign a socket to the skb -- consumes sk */
-static void
-nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
-{
-       skb_orphan(skb);
-       skb->sk = sk;
-       skb->destructor = sock_edemux;
-}
-
 static unsigned int
 tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
           u_int32_t mark_mask, u_int32_t mark_value)
index 7df2dece57d30f6c4e921cf3eeff40f5319b672a..5d92e178198088b85d040473f909aa9eab78c18e 100644 (file)
@@ -72,8 +72,9 @@ static bool
 cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
        const struct xt_cgroup_info_v0 *info = par->matchinfo;
+       struct sock *sk = skb->sk;
 
-       if (skb->sk == NULL || !sk_fullsock(skb->sk))
+       if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
                return false;
 
        return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^
@@ -85,8 +86,9 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
        const struct xt_cgroup_info_v1 *info = par->matchinfo;
        struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
        struct cgroup *ancestor = info->priv;
+       struct sock *sk = skb->sk;
 
-       if (!skb->sk || !sk_fullsock(skb->sk))
+       if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
                return false;
 
        if (ancestor)
index 3d705c688a27b53afdcb53460ed6509e3e8024f4..46686fb73784bf71c79282e87e3f01f2c0411f5c 100644 (file)
@@ -67,7 +67,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
        struct sock *sk = skb_to_full_sk(skb);
        struct net *net = xt_net(par);
 
-       if (sk == NULL || sk->sk_socket == NULL)
+       if (!sk || !sk->sk_socket || !net_eq(net, sock_net(sk)))
                return (info->match ^ info->invert) == 0;
        else if (info->match & info->invert & XT_OWNER_SOCKET)
                /*
index 07085c22b19c4d7e0970638b1e361c0f99a2c1dc..f44de4bc2100a811f4c2886668e390a9ac74a82a 100644 (file)
@@ -265,7 +265,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
        }
 
        /* use TTL as seen before forwarding */
-       if (xt_out(par) != NULL && skb->sk == NULL)
+       if (xt_out(par) != NULL &&
+           (!skb->sk || !net_eq(net, sock_net(skb->sk))))
                ttl++;
 
        spin_lock_bh(&recent_lock);
index 5c0779c4fa3cdb1c628ac3c08e9dd1c373cc8e89..0472f34728423ac1a3ba839a72e4aab167df1091 100644 (file)
@@ -56,8 +56,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
        struct sk_buff *pskb = (struct sk_buff *)skb;
        struct sock *sk = skb->sk;
 
+       if (!net_eq(xt_net(par), sock_net(sk)))
+               sk = NULL;
+
        if (!sk)
                sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));
+
        if (sk) {
                bool wildcard;
                bool transparent = true;
@@ -113,8 +117,12 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
        struct sk_buff *pskb = (struct sk_buff *)skb;
        struct sock *sk = skb->sk;
 
+       if (!net_eq(xt_net(par), sock_net(sk)))
+               sk = NULL;
+
        if (!sk)
                sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));
+
        if (sk) {
                bool wildcard;
                bool transparent = true;
index 56704d95f82d27f5a2bc26714e5001f3868765b2..930d17fa906c9ebf1cf7b6031ce0a22f9f66c0e4 100644 (file)
@@ -2307,7 +2307,6 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 
        cb = &nlk->cb;
        memset(cb, 0, sizeof(*cb));
-       cb->start = control->start;
        cb->dump = control->dump;
        cb->done = control->done;
        cb->nlh = nlh;
@@ -2316,8 +2315,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
        cb->min_dump_alloc = control->min_dump_alloc;
        cb->skb = skb;
 
-       if (cb->start) {
-               ret = cb->start(cb);
+       if (control->start) {
+               ret = control->start(cb);
                if (ret)
                        goto error_put;
        }
index ea0c0c6f187429426f4849347c09b847f0111fff..dd4adf8b1167ed61bfcbdba3df7164b4b98dc98a 100644 (file)
@@ -556,7 +556,7 @@ static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
 
        pr_debug("%p\n", sk);
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
 
        if (sk->sk_state == LLCP_LISTEN)
                return llcp_accept_poll(sk);
index 30a5df27116ec695d08bd4b57d8ee97b93822403..85ae53d8fd098b80e22a4b3ccc26f83be7b110c5 100644 (file)
@@ -1057,6 +1057,28 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
                             clone_flow_key);
 }
 
+/* When 'last' is true, clone() should always consume the 'skb'.
+ * Otherwise, clone() should keep 'skb' intact regardless what
+ * actions are executed within clone().
+ */
+static int clone(struct datapath *dp, struct sk_buff *skb,
+                struct sw_flow_key *key, const struct nlattr *attr,
+                bool last)
+{
+       struct nlattr *actions;
+       struct nlattr *clone_arg;
+       int rem = nla_len(attr);
+       bool dont_clone_flow_key;
+
+       /* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+       clone_arg = nla_data(attr);
+       dont_clone_flow_key = nla_get_u32(clone_arg);
+       actions = nla_next(clone_arg, &rem);
+
+       return clone_execute(dp, skb, key, 0, actions, rem, last,
+                            !dont_clone_flow_key);
+}
+
 static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
                         const struct nlattr *attr)
 {
@@ -1336,6 +1358,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
                                consume_skb(skb);
                                return 0;
                        }
+                       break;
+
+               case OVS_ACTION_ATTR_CLONE: {
+                       bool last = nla_is_last(a, rem);
+
+                       err = clone(dp, skb, key, a, last);
+                       if (last)
+                               return err;
+
+                       break;
+               }
                }
 
                if (unlikely(err)) {
index 284aca2a252df5cf6fabb0afd931a0c7c3d53217..86a75105af1a2726bc52e44e6c3ac691d719999f 100644 (file)
@@ -26,6 +26,7 @@
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/ipv6_frag.h>
 
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <linux/netfilter/nf_nat.h>
@@ -607,23 +608,12 @@ static struct nf_conn *
 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
                     u8 l3num, struct sk_buff *skb, bool natted)
 {
-       const struct nf_conntrack_l3proto *l3proto;
-       const struct nf_conntrack_l4proto *l4proto;
        struct nf_conntrack_tuple tuple;
        struct nf_conntrack_tuple_hash *h;
        struct nf_conn *ct;
-       unsigned int dataoff;
-       u8 protonum;
 
-       l3proto = __nf_ct_l3proto_find(l3num);
-       if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
-                                &protonum) <= 0) {
-               pr_debug("ovs_ct_find_existing: Can't get protonum\n");
-               return NULL;
-       }
-       l4proto = __nf_ct_l4proto_find(l3num, protonum);
-       if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
-                            protonum, net, &tuple, l3proto, l4proto)) {
+       if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num,
+                              net, &tuple)) {
                pr_debug("ovs_ct_find_existing: Can't get tuple\n");
                return NULL;
        }
@@ -632,7 +622,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
        if (natted) {
                struct nf_conntrack_tuple inverse;
 
-               if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) {
+               if (!nf_ct_invert_tuplepr(&inverse, &tuple)) {
                        pr_debug("ovs_ct_find_existing: Inversion failed!\n");
                        return NULL;
                }
@@ -1314,7 +1304,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
                return -EINVAL;
        }
 
-       help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
+       help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL);
        if (!help) {
                nf_conntrack_helper_put(helper);
                return -ENOMEM;
index 492ab0c36f7c9e3caf6de7e7d77368028716e09c..a70097ecf33c2bf9e9df7b92c2359ab679ae6d7e 100644 (file)
@@ -2460,6 +2460,40 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
        return 0;
 }
 
+static int validate_and_copy_clone(struct net *net,
+                                  const struct nlattr *attr,
+                                  const struct sw_flow_key *key,
+                                  struct sw_flow_actions **sfa,
+                                  __be16 eth_type, __be16 vlan_tci,
+                                  bool log, bool last)
+{
+       int start, err;
+       u32 exec;
+
+       if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
+               return -EINVAL;
+
+       start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
+       if (start < 0)
+               return start;
+
+       exec = last || !actions_may_change_flow(attr);
+
+       err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
+                                sizeof(exec), log);
+       if (err)
+               return err;
+
+       err = __ovs_nla_copy_actions(net, attr, key, sfa,
+                                    eth_type, vlan_tci, log);
+       if (err)
+               return err;
+
+       add_nested_action_end(*sfa, start);
+
+       return 0;
+}
+
 void ovs_match_init(struct sw_flow_match *match,
                    struct sw_flow_key *key,
                    bool reset_key,
@@ -2516,7 +2550,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
        struct ovs_tunnel_info *ovs_tun;
        struct nlattr *a;
        int err = 0, start, opts_type;
+       __be16 dst_opt_type;
 
+       dst_opt_type = 0;
        ovs_match_init(&match, &key, true, NULL);
        opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
        if (opts_type < 0)
@@ -2528,10 +2564,13 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
                        err = validate_geneve_opts(&key);
                        if (err < 0)
                                return err;
+                       dst_opt_type = TUNNEL_GENEVE_OPT;
                        break;
                case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+                       dst_opt_type = TUNNEL_VXLAN_OPT;
                        break;
                case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+                       dst_opt_type = TUNNEL_ERSPAN_OPT;
                        break;
                }
        }
@@ -2574,7 +2613,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
         */
        ip_tunnel_info_opts_set(tun_info,
                                TUN_METADATA_OPTS(&key, key.tun_opts_len),
-                               key.tun_opts_len);
+                               key.tun_opts_len, dst_opt_type);
        add_nested_action_end(*sfa, start);
 
        return err;
@@ -2844,6 +2883,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                        [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
                        [OVS_ACTION_ATTR_POP_NSH] = 0,
                        [OVS_ACTION_ATTR_METER] = sizeof(u32),
+                       [OVS_ACTION_ATTR_CLONE] = (u32)-1,
                };
                const struct ovs_action_push_vlan *vlan;
                int type = nla_type(a);
@@ -3033,6 +3073,18 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                        /* Non-existent meters are simply ignored.  */
                        break;
 
+               case OVS_ACTION_ATTR_CLONE: {
+                       bool last = nla_is_last(a, rem);
+
+                       err = validate_and_copy_clone(net, a, key, sfa,
+                                                     eth_type, vlan_tci,
+                                                     log, last);
+                       if (err)
+                               return err;
+                       skip_copy = true;
+                       break;
+               }
+
                default:
                        OVS_NLERR(log, "Unknown Action type %d", type);
                        return -EINVAL;
@@ -3111,6 +3163,26 @@ out:
        return err;
 }
 
+static int clone_action_to_attr(const struct nlattr *attr,
+                               struct sk_buff *skb)
+{
+       struct nlattr *start;
+       int err = 0, rem = nla_len(attr);
+
+       start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE);
+       if (!start)
+               return -EMSGSIZE;
+
+       err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+
+       if (err)
+               nla_nest_cancel(skb, start);
+       else
+               nla_nest_end(skb, start);
+
+       return err;
+}
+
 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 {
        const struct nlattr *ovs_key = nla_data(a);
@@ -3199,6 +3271,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
                                return err;
                        break;
 
+               case OVS_ACTION_ATTR_CLONE:
+                       err = clone_action_to_attr(a, skb);
+                       if (err)
+                               return err;
+                       break;
+
                default:
                        if (nla_put(skb, type, nla_len(a), nla_data(a)))
                                return -EMSGSIZE;
index 9b27d0cd766d560fdb67ee2e3bbfc415963db8c6..345e38058ae58d2297d77743754b7d2adffd065c 100644 (file)
@@ -275,9 +275,10 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
        return po->xmit == packet_direct_xmit;
 }
 
-static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb,
+                                 struct net_device *sb_dev)
 {
-       return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
+       return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL);
 }
 
 static u16 packet_pick_tx_queue(struct sk_buff *skb)
@@ -291,7 +292,7 @@ static u16 packet_pick_tx_queue(struct sk_buff *skb)
                                                    __packet_pick_tx_queue);
                queue_index = netdev_cap_txqueue(dev, queue_index);
        } else {
-               queue_index = __packet_pick_tx_queue(dev, skb);
+               queue_index = __packet_pick_tx_queue(dev, skb, NULL);
        }
 
        return queue_index;
@@ -1581,7 +1582,7 @@ static int fanout_set_data(struct packet_sock *po, char __user *data,
                return fanout_set_data_ebpf(po, data, len);
        default:
                return -EINVAL;
-       };
+       }
 }
 
 static void fanout_release_data(struct packet_fanout *f)
@@ -1590,7 +1591,7 @@ static void fanout_release_data(struct packet_fanout *f)
        case PACKET_FANOUT_CBPF:
        case PACKET_FANOUT_EBPF:
                __fanout_set_data_bpf(f, NULL);
-       };
+       }
 }
 
 static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
@@ -1951,7 +1952,7 @@ retry:
                goto out_unlock;
        }
 
-       sockc.tsflags = sk->sk_tsflags;
+       sockcm_init(&sockc, sk);
        if (msg->msg_controllen) {
                err = sock_cmsg_send(sk, msg, &sockc);
                if (unlikely(err))
@@ -1962,6 +1963,7 @@ retry:
        skb->dev = dev;
        skb->priority = sk->sk_priority;
        skb->mark = sk->sk_mark;
+       skb->tstamp = sockc.transmit_time;
 
        sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
 
@@ -2457,6 +2459,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
        skb->dev = dev;
        skb->priority = po->sk.sk_priority;
        skb->mark = po->sk.sk_mark;
+       skb->tstamp = sockc->transmit_time;
        sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
        skb_shinfo(skb)->destructor_arg = ph.raw;
 
@@ -2633,7 +2636,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
        if (unlikely(!(dev->flags & IFF_UP)))
                goto out_put;
 
-       sockc.tsflags = po->sk.sk_tsflags;
+       sockcm_init(&sockc, &po->sk);
        if (msg->msg_controllen) {
                err = sock_cmsg_send(&po->sk, msg, &sockc);
                if (unlikely(err))
@@ -2829,7 +2832,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
        if (unlikely(!(dev->flags & IFF_UP)))
                goto out_unlock;
 
-       sockc.tsflags = sk->sk_tsflags;
+       sockcm_init(&sockc, sk);
        sockc.mark = sk->sk_mark;
        if (msg->msg_controllen) {
                err = sock_cmsg_send(sk, msg, &sockc);
@@ -2905,6 +2908,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
        skb->dev = dev;
        skb->priority = sk->sk_priority;
        skb->mark = sockc.mark;
+       skb->tstamp = sockc.transmit_time;
 
        if (has_vnet_hdr) {
                err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
index bffde4b46c5d2058027da6158c6d780edde78950..41f75563b54b1f02cb7e591b46096dd7721a44ae 100644 (file)
@@ -24,4 +24,3 @@ config RDS_DEBUG
         bool "RDS debugging messages"
        depends on RDS
         default n
-
index b5d568bd479cb7c7e034ef3eabc1bda1605e556c..e647f9de104a67b06708eee61599ad833b91e2b1 100644 (file)
@@ -15,4 +15,3 @@ rds_tcp-y :=          tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
                        tcp_send.o tcp_stats.o
 
 ccflags-$(CONFIG_RDS_DEBUG)    :=      -DRDS_DEBUG
-
index ab751a150f707ccf4b6b4b5f0a378325882d95b2..65387e1e6964806d0fccfdb1ada1667cddcffc43 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -35,6 +35,7 @@
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/in.h>
+#include <linux/ipv6.h>
 #include <linux/poll.h>
 #include <net/sock.h>
 
@@ -113,26 +114,82 @@ void rds_wake_sk_sleep(struct rds_sock *rs)
 static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
                       int peer)
 {
-       struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
        struct rds_sock *rs = rds_sk_to_rs(sock->sk);
-
-       memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+       struct sockaddr_in6 *sin6;
+       struct sockaddr_in *sin;
+       int uaddr_len;
 
        /* racey, don't care */
        if (peer) {
-               if (!rs->rs_conn_addr)
+               if (ipv6_addr_any(&rs->rs_conn_addr))
                        return -ENOTCONN;
 
-               sin->sin_port = rs->rs_conn_port;
-               sin->sin_addr.s_addr = rs->rs_conn_addr;
+               if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) {
+                       sin = (struct sockaddr_in *)uaddr;
+                       memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+                       sin->sin_family = AF_INET;
+                       sin->sin_port = rs->rs_conn_port;
+                       sin->sin_addr.s_addr = rs->rs_conn_addr_v4;
+                       uaddr_len = sizeof(*sin);
+               } else {
+                       sin6 = (struct sockaddr_in6 *)uaddr;
+                       sin6->sin6_family = AF_INET6;
+                       sin6->sin6_port = rs->rs_conn_port;
+                       sin6->sin6_addr = rs->rs_conn_addr;
+                       sin6->sin6_flowinfo = 0;
+                       /* scope_id is the same as in the bound address. */
+                       sin6->sin6_scope_id = rs->rs_bound_scope_id;
+                       uaddr_len = sizeof(*sin6);
+               }
        } else {
-               sin->sin_port = rs->rs_bound_port;
-               sin->sin_addr.s_addr = rs->rs_bound_addr;
+               /* If socket is not yet bound and the socket is connected,
+                * set the return address family to be the same as the
+                * connected address, but with 0 address value.  If it is not
+                * connected, set the family to be AF_UNSPEC (value 0) and
+                * the address size to be that of an IPv4 address.
+                */
+               if (ipv6_addr_any(&rs->rs_bound_addr)) {
+                       if (ipv6_addr_any(&rs->rs_conn_addr)) {
+                               sin = (struct sockaddr_in *)uaddr;
+                               memset(sin, 0, sizeof(*sin));
+                               sin->sin_family = AF_UNSPEC;
+                               return sizeof(*sin);
+                       }
+
+#if IS_ENABLED(CONFIG_IPV6)
+                       if (!(ipv6_addr_type(&rs->rs_conn_addr) &
+                             IPV6_ADDR_MAPPED)) {
+                               sin6 = (struct sockaddr_in6 *)uaddr;
+                               memset(sin6, 0, sizeof(*sin6));
+                               sin6->sin6_family = AF_INET6;
+                               return sizeof(*sin6);
+                       }
+#endif
+
+                       sin = (struct sockaddr_in *)uaddr;
+                       memset(sin, 0, sizeof(*sin));
+                       sin->sin_family = AF_INET;
+                       return sizeof(*sin);
+               }
+               if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
+                       sin = (struct sockaddr_in *)uaddr;
+                       memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+                       sin->sin_family = AF_INET;
+                       sin->sin_port = rs->rs_bound_port;
+                       sin->sin_addr.s_addr = rs->rs_bound_addr_v4;
+                       uaddr_len = sizeof(*sin);
+               } else {
+                       sin6 = (struct sockaddr_in6 *)uaddr;
+                       sin6->sin6_family = AF_INET6;
+                       sin6->sin6_port = rs->rs_bound_port;
+                       sin6->sin6_addr = rs->rs_bound_addr;
+                       sin6->sin6_flowinfo = 0;
+                       sin6->sin6_scope_id = rs->rs_bound_scope_id;
+                       uaddr_len = sizeof(*sin6);
+               }
        }
 
-       sin->sin_family = AF_INET;
-
-       return sizeof(*sin);
+       return uaddr_len;
 }
 
 /*
@@ -203,11 +260,12 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
                              int len)
 {
+       struct sockaddr_in6 sin6;
        struct sockaddr_in sin;
        int ret = 0;
 
        /* racing with another thread binding seems ok here */
-       if (rs->rs_bound_addr == 0) {
+       if (ipv6_addr_any(&rs->rs_bound_addr)) {
                ret = -ENOTCONN; /* XXX not a great errno */
                goto out;
        }
@@ -215,14 +273,23 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
        if (len < sizeof(struct sockaddr_in)) {
                ret = -EINVAL;
                goto out;
+       } else if (len < sizeof(struct sockaddr_in6)) {
+               /* Assume IPv4 */
+               if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) {
+                       ret = -EFAULT;
+                       goto out;
+               }
+               ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr);
+               sin6.sin6_port = sin.sin_port;
+       } else {
+               if (copy_from_user(&sin6, optval,
+                                  sizeof(struct sockaddr_in6))) {
+                       ret = -EFAULT;
+                       goto out;
+               }
        }
 
-       if (copy_from_user(&sin, optval, sizeof(sin))) {
-               ret = -EFAULT;
-               goto out;
-       }
-
-       rds_send_drop_to(rs, &sin);
+       rds_send_drop_to(rs, &sin6);
 out:
        return ret;
 }
@@ -435,31 +502,91 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
                       int addr_len, int flags)
 {
        struct sock *sk = sock->sk;
-       struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
+       struct sockaddr_in *sin;
        struct rds_sock *rs = rds_sk_to_rs(sk);
        int ret = 0;
 
        lock_sock(sk);
 
-       if (addr_len != sizeof(struct sockaddr_in)) {
-               ret = -EINVAL;
-               goto out;
-       }
+       switch (uaddr->sa_family) {
+       case AF_INET:
+               sin = (struct sockaddr_in *)uaddr;
+               if (addr_len < sizeof(struct sockaddr_in)) {
+                       ret = -EINVAL;
+                       break;
+               }
+               if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
+                       ret = -EDESTADDRREQ;
+                       break;
+               }
+               if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) ||
+                   sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) {
+                       ret = -EINVAL;
+                       break;
+               }
+               ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr);
+               rs->rs_conn_port = sin->sin_port;
+               break;
 
-       if (sin->sin_family != AF_INET) {
-               ret = -EAFNOSUPPORT;
-               goto out;
-       }
+#if IS_ENABLED(CONFIG_IPV6)
+       case AF_INET6: {
+               struct sockaddr_in6 *sin6;
+               int addr_type;
 
-       if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
-               ret = -EDESTADDRREQ;
-               goto out;
+               sin6 = (struct sockaddr_in6 *)uaddr;
+               if (addr_len < sizeof(struct sockaddr_in6)) {
+                       ret = -EINVAL;
+                       break;
+               }
+               addr_type = ipv6_addr_type(&sin6->sin6_addr);
+               if (!(addr_type & IPV6_ADDR_UNICAST)) {
+                       __be32 addr4;
+
+                       if (!(addr_type & IPV6_ADDR_MAPPED)) {
+                               ret = -EPROTOTYPE;
+                               break;
+                       }
+
+                       /* It is a mapped address.  Need to do some sanity
+                        * checks.
+                        */
+                       addr4 = sin6->sin6_addr.s6_addr32[3];
+                       if (addr4 == htonl(INADDR_ANY) ||
+                           addr4 == htonl(INADDR_BROADCAST) ||
+                           IN_MULTICAST(ntohl(addr4))) {
+                               ret = -EPROTOTYPE;
+                               break;
+                       }
+               }
+
+               if (addr_type & IPV6_ADDR_LINKLOCAL) {
+                       /* If socket is arleady bound to a link local address,
+                        * the peer address must be on the same link.
+                        */
+                       if (sin6->sin6_scope_id == 0 ||
+                           (!ipv6_addr_any(&rs->rs_bound_addr) &&
+                            rs->rs_bound_scope_id &&
+                            sin6->sin6_scope_id != rs->rs_bound_scope_id)) {
+                               ret = -EINVAL;
+                               break;
+                       }
+                       /* Remember the connected address scope ID.  It will
+                        * be checked against the binding local address when
+                        * the socket is bound.
+                        */
+                       rs->rs_bound_scope_id = sin6->sin6_scope_id;
+               }
+               rs->rs_conn_addr = sin6->sin6_addr;
+               rs->rs_conn_port = sin6->sin6_port;
+               break;
        }
+#endif
 
-       rs->rs_conn_addr = sin->sin_addr.s_addr;
-       rs->rs_conn_port = sin->sin_port;
+       default:
+               ret = -EAFNOSUPPORT;
+               break;
+       }
 
-out:
        release_sock(sk);
        return ret;
 }
@@ -578,8 +705,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
                list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
                        total++;
                        if (total <= len)
-                               rds_inc_info_copy(inc, iter, inc->i_saddr,
-                                                 rs->rs_bound_addr, 1);
+                               rds_inc_info_copy(inc, iter,
+                                                 inc->i_saddr.s6_addr32[3],
+                                                 rs->rs_bound_addr_v4,
+                                                 1);
                }
 
                read_unlock(&rs->rs_recv_lock);
@@ -608,8 +737,8 @@ static void rds_sock_info(struct socket *sock, unsigned int len,
        list_for_each_entry(rs, &rds_sock_list, rs_item) {
                sinfo.sndbuf = rds_sk_sndbuf(rs);
                sinfo.rcvbuf = rds_sk_rcvbuf(rs);
-               sinfo.bound_addr = rs->rs_bound_addr;
-               sinfo.connected_addr = rs->rs_conn_addr;
+               sinfo.bound_addr = rs->rs_bound_addr_v4;
+               sinfo.connected_addr = rs->rs_conn_addr_v4;
                sinfo.bound_port = rs->rs_bound_port;
                sinfo.connected_port = rs->rs_conn_port;
                sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
index 5aa3a64aa4f0ef254bd31ecbfb11c81db0438185..3ab55784b637e3b30bd5dca1a19814943aae42e7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -33,6 +33,7 @@
 #include <linux/kernel.h>
 #include <net/sock.h>
 #include <linux/in.h>
+#include <linux/ipv6.h>
 #include <linux/if_arp.h>
 #include <linux/jhash.h>
 #include <linux/ratelimit.h>
@@ -42,42 +43,58 @@ static struct rhashtable bind_hash_table;
 
 static const struct rhashtable_params ht_parms = {
        .nelem_hint = 768,
-       .key_len = sizeof(u64),
+       .key_len = RDS_BOUND_KEY_LEN,
        .key_offset = offsetof(struct rds_sock, rs_bound_key),
        .head_offset = offsetof(struct rds_sock, rs_bound_node),
        .max_size = 16384,
        .min_size = 1024,
 };
 
+/* Create a key for the bind hash table manipulation.  Port is in network byte
+ * order.
+ */
+static inline void __rds_create_bind_key(u8 *key, const struct in6_addr *addr,
+                                        __be16 port, __u32 scope_id)
+{
+       memcpy(key, addr, sizeof(*addr));
+       key += sizeof(*addr);
+       memcpy(key, &port, sizeof(port));
+       key += sizeof(port);
+       memcpy(key, &scope_id, sizeof(scope_id));
+}
+
 /*
  * Return the rds_sock bound at the given local address.
  *
  * The rx path can race with rds_release.  We notice if rds_release() has
  * marked this socket and don't return a rs ref to the rx path.
  */
-struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
+struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
+                               __u32 scope_id)
 {
-       u64 key = ((u64)addr << 32) | port;
+       u8 key[RDS_BOUND_KEY_LEN];
        struct rds_sock *rs;
 
-       rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms);
+       __rds_create_bind_key(key, addr, port, scope_id);
+       rs = rhashtable_lookup_fast(&bind_hash_table, key, ht_parms);
        if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
                rds_sock_addref(rs);
        else
                rs = NULL;
 
-       rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
-               ntohs(port));
+       rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr,
+                ntohs(port));
 
        return rs;
 }
 
 /* returns -ve errno or +ve port */
-static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
+static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr,
+                        __be16 *port, __u32 scope_id)
 {
        int ret = -EADDRINUSE;
        u16 rover, last;
-       u64 key;
+       u8 key[RDS_BOUND_KEY_LEN];
 
        if (*port != 0) {
                rover = be16_to_cpu(*port);
@@ -95,12 +112,13 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
 
                if (rover == RDS_FLAG_PROBE_PORT)
                        continue;
-               key = ((u64)addr << 32) | cpu_to_be16(rover);
-               if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms))
+               __rds_create_bind_key(key, addr, cpu_to_be16(rover),
+                                     scope_id);
+               if (rhashtable_lookup_fast(&bind_hash_table, key, ht_parms))
                        continue;
 
-               rs->rs_bound_key = key;
-               rs->rs_bound_addr = addr;
+               memcpy(rs->rs_bound_key, key, sizeof(rs->rs_bound_key));
+               rs->rs_bound_addr = *addr;
                net_get_random_once(&rs->rs_hash_initval,
                                    sizeof(rs->rs_hash_initval));
                rs->rs_bound_port = cpu_to_be16(rover);
@@ -109,12 +127,13 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
                if (!rhashtable_insert_fast(&bind_hash_table,
                                            &rs->rs_bound_node, ht_parms)) {
                        *port = rs->rs_bound_port;
+                       rs->rs_bound_scope_id = scope_id;
                        ret = 0;
-                       rdsdebug("rs %p binding to %pI4:%d\n",
-                         rs, &addr, (int)ntohs(*port));
+                       rdsdebug("rs %p binding to %pI6c:%d\n",
+                                rs, addr, (int)ntohs(*port));
                        break;
                } else {
-                       rs->rs_bound_addr = 0;
+                       rs->rs_bound_addr = in6addr_any;
                        rds_sock_put(rs);
                        ret = -ENOMEM;
                        break;
@@ -127,44 +146,103 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
 void rds_remove_bound(struct rds_sock *rs)
 {
 
-       if (!rs->rs_bound_addr)
+       if (ipv6_addr_any(&rs->rs_bound_addr))
                return;
 
-       rdsdebug("rs %p unbinding from %pI4:%d\n",
+       rdsdebug("rs %p unbinding from %pI6c:%d\n",
                 rs, &rs->rs_bound_addr,
                 ntohs(rs->rs_bound_port));
 
        rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms);
        rds_sock_put(rs);
-       rs->rs_bound_addr = 0;
+       rs->rs_bound_addr = in6addr_any;
 }
 
 int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
        struct sock *sk = sock->sk;
-       struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
        struct rds_sock *rs = rds_sk_to_rs(sk);
+       struct in6_addr v6addr, *binding_addr;
        struct rds_transport *trans;
+       __u32 scope_id = 0;
        int ret = 0;
+       __be16 port;
+
+       /* We allow an RDS socket to be bound to either IPv4 or IPv6
+        * address.
+        */
+       if (uaddr->sa_family == AF_INET) {
+               struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
+
+               if (addr_len < sizeof(struct sockaddr_in) ||
+                   sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
+                   sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
+                   IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+                       return -EINVAL;
+               ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
+               binding_addr = &v6addr;
+               port = sin->sin_port;
+#if IS_ENABLED(CONFIG_IPV6)
+       } else if (uaddr->sa_family == AF_INET6) {
+               struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr;
+               int addr_type;
+
+               if (addr_len < sizeof(struct sockaddr_in6))
+                       return -EINVAL;
+               addr_type = ipv6_addr_type(&sin6->sin6_addr);
+               if (!(addr_type & IPV6_ADDR_UNICAST)) {
+                       __be32 addr4;
 
+                       if (!(addr_type & IPV6_ADDR_MAPPED))
+                               return -EINVAL;
+
+                       /* It is a mapped address.  Need to do some sanity
+                        * checks.
+                        */
+                       addr4 = sin6->sin6_addr.s6_addr32[3];
+                       if (addr4 == htonl(INADDR_ANY) ||
+                           addr4 == htonl(INADDR_BROADCAST) ||
+                           IN_MULTICAST(ntohl(addr4)))
+                               return -EINVAL;
+               }
+               /* The scope ID must be specified for link local address. */
+               if (addr_type & IPV6_ADDR_LINKLOCAL) {
+                       if (sin6->sin6_scope_id == 0)
+                               return -EINVAL;
+                       scope_id = sin6->sin6_scope_id;
+               }
+               binding_addr = &sin6->sin6_addr;
+               port = sin6->sin6_port;
+#endif
+       } else {
+               return -EINVAL;
+       }
        lock_sock(sk);
 
-       if (addr_len != sizeof(struct sockaddr_in) ||
-           sin->sin_family != AF_INET ||
-           rs->rs_bound_addr ||
-           sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
+       /* RDS socket does not allow re-binding. */
+       if (!ipv6_addr_any(&rs->rs_bound_addr)) {
+               ret = -EINVAL;
+               goto out;
+       }
+       /* Socket is connected.  The binding address should have the same
+        * scope ID as the connected address, except the case when one is
+        * non-link local address (scope_id is 0).
+        */
+       if (!ipv6_addr_any(&rs->rs_conn_addr) && scope_id &&
+           rs->rs_bound_scope_id &&
+           scope_id != rs->rs_bound_scope_id) {
                ret = -EINVAL;
                goto out;
        }
 
-       ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port);
+       ret = rds_add_bound(rs, binding_addr, &port, scope_id);
        if (ret)
                goto out;
 
        if (rs->rs_transport) { /* previously bound */
                trans = rs->rs_transport;
                if (trans->laddr_check(sock_net(sock->sk),
-                                      sin->sin_addr.s_addr) != 0) {
+                                      binding_addr, scope_id) != 0) {
                        ret = -ENOPROTOOPT;
                        rds_remove_bound(rs);
                } else {
@@ -172,13 +250,13 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                }
                goto out;
        }
-       trans = rds_trans_get_preferred(sock_net(sock->sk),
-                                       sin->sin_addr.s_addr);
+       trans = rds_trans_get_preferred(sock_net(sock->sk), binding_addr,
+                                       scope_id);
        if (!trans) {
                ret = -EADDRNOTAVAIL;
                rds_remove_bound(rs);
-               pr_info_ratelimited("RDS: %s could not find a transport for %pI4, load rds_tcp or rds_rdma?\n",
-                                   __func__, &sin->sin_addr.s_addr);
+               pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n",
+                                   __func__, binding_addr);
                goto out;
        }
 
index 63da9d2f142d19016e1094f7f4136781da8d6037..ccdff09a79c80f8fb11c86c3c472ef7708bf2d67 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007 Oracle.  All rights reserved.
+ * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -101,7 +101,7 @@ static DEFINE_RWLOCK(rds_cong_monitor_lock);
 static DEFINE_SPINLOCK(rds_cong_lock);
 static struct rb_root rds_cong_tree = RB_ROOT;
 
-static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
+static struct rds_cong_map *rds_cong_tree_walk(const struct in6_addr *addr,
                                               struct rds_cong_map *insert)
 {
        struct rb_node **p = &rds_cong_tree.rb_node;
@@ -109,12 +109,15 @@ static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
        struct rds_cong_map *map;
 
        while (*p) {
+               int diff;
+
                parent = *p;
                map = rb_entry(parent, struct rds_cong_map, m_rb_node);
 
-               if (addr < map->m_addr)
+               diff = rds_addr_cmp(addr, &map->m_addr);
+               if (diff < 0)
                        p = &(*p)->rb_left;
-               else if (addr > map->m_addr)
+               else if (diff > 0)
                        p = &(*p)->rb_right;
                else
                        return map;
@@ -132,7 +135,7 @@ static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
  * these bitmaps in the process getting pointers to them.  The bitmaps are only
  * ever freed as the module is removed after all connections have been freed.
  */
-static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
+static struct rds_cong_map *rds_cong_from_addr(const struct in6_addr *addr)
 {
        struct rds_cong_map *map;
        struct rds_cong_map *ret = NULL;
@@ -144,7 +147,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
        if (!map)
                return NULL;
 
-       map->m_addr = addr;
+       map->m_addr = *addr;
        init_waitqueue_head(&map->m_waitq);
        INIT_LIST_HEAD(&map->m_conn_list);
 
@@ -171,7 +174,7 @@ out:
                kfree(map);
        }
 
-       rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr));
+       rdsdebug("map %p for addr %pI6c\n", ret, addr);
 
        return ret;
 }
@@ -202,8 +205,8 @@ void rds_cong_remove_conn(struct rds_connection *conn)
 
 int rds_cong_get_maps(struct rds_connection *conn)
 {
-       conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
-       conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
+       conn->c_lcong = rds_cong_from_addr(&conn->c_laddr);
+       conn->c_fcong = rds_cong_from_addr(&conn->c_faddr);
 
        if (!(conn->c_lcong && conn->c_fcong))
                return -ENOMEM;
@@ -353,7 +356,7 @@ void rds_cong_remove_socket(struct rds_sock *rs)
 
        /* update congestion map for now-closed port */
        spin_lock_irqsave(&rds_cong_lock, flags);
-       map = rds_cong_tree_walk(rs->rs_bound_addr, NULL);
+       map = rds_cong_tree_walk(&rs->rs_bound_addr, NULL);
        spin_unlock_irqrestore(&rds_cong_lock, flags);
 
        if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
index cfb05953b0e57afad21fd708f0df42d63c77cd55..3bd2f4a5a30d9eccc83f3242db0a59031d5ac5a3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -34,7 +34,9 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/export.h>
-#include <net/inet_hashtables.h>
+#include <net/ipv6.h>
+#include <net/inet6_hashtables.h>
+#include <net/addrconf.h>
 
 #include "rds.h"
 #include "loop.h"
@@ -49,18 +51,25 @@ static unsigned long rds_conn_count;
 static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES];
 static struct kmem_cache *rds_conn_slab;
 
-static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
+static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr,
+                                         const struct in6_addr *faddr)
 {
+       static u32 rds6_hash_secret __read_mostly;
        static u32 rds_hash_secret __read_mostly;
 
-       unsigned long hash;
+       u32 lhash, fhash, hash;
 
        net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret));
+       net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret));
+
+       lhash = (__force u32)laddr->s6_addr32[3];
+#if IS_ENABLED(CONFIG_IPV6)
+       fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret);
+#else
+       fhash = (__force u32)faddr->s6_addr32[3];
+#endif
+       hash = __inet_ehashfn(lhash, 0, fhash, 0, rds_hash_secret);
 
-       /* Pass NULL, don't need struct net for hash */
-       hash = __inet_ehashfn(be32_to_cpu(laddr), 0,
-                             be32_to_cpu(faddr), 0,
-                             rds_hash_secret);
        return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK];
 }
 
@@ -72,20 +81,25 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
 /* rcu read lock must be held or the connection spinlock */
 static struct rds_connection *rds_conn_lookup(struct net *net,
                                              struct hlist_head *head,
-                                             __be32 laddr, __be32 faddr,
-                                             struct rds_transport *trans)
+                                             const struct in6_addr *laddr,
+                                             const struct in6_addr *faddr,
+                                             struct rds_transport *trans,
+                                             int dev_if)
 {
        struct rds_connection *conn, *ret = NULL;
 
        hlist_for_each_entry_rcu(conn, head, c_hash_node) {
-               if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
-                   conn->c_trans == trans && net == rds_conn_net(conn)) {
+               if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
+                   ipv6_addr_equal(&conn->c_laddr, laddr) &&
+                   conn->c_trans == trans &&
+                   net == rds_conn_net(conn) &&
+                   conn->c_dev_if == dev_if) {
                        ret = conn;
                        break;
                }
        }
-       rdsdebug("returning conn %p for %pI4 -> %pI4\n", ret,
-                &laddr, &faddr);
+       rdsdebug("returning conn %p for %pI6c -> %pI6c\n", ret,
+                laddr, faddr);
        return ret;
 }
 
@@ -99,8 +113,8 @@ static void rds_conn_path_reset(struct rds_conn_path *cp)
 {
        struct rds_connection *conn = cp->cp_conn;
 
-       rdsdebug("connection %pI4 to %pI4 reset\n",
-         &conn->c_laddr, &conn->c_faddr);
+       rdsdebug("connection %pI6c to %pI6c reset\n",
+                &conn->c_laddr, &conn->c_faddr);
 
        rds_stats_inc(s_conn_reset);
        rds_send_path_reset(cp);
@@ -142,9 +156,12 @@ static void __rds_conn_path_init(struct rds_connection *conn,
  * are torn down as the module is removed, if ever.
  */
 static struct rds_connection *__rds_conn_create(struct net *net,
-                                               __be32 laddr, __be32 faddr,
-                                      struct rds_transport *trans, gfp_t gfp,
-                                      int is_outgoing)
+                                               const struct in6_addr *laddr,
+                                               const struct in6_addr *faddr,
+                                               struct rds_transport *trans,
+                                               gfp_t gfp,
+                                               int is_outgoing,
+                                               int dev_if)
 {
        struct rds_connection *conn, *parent = NULL;
        struct hlist_head *head = rds_conn_bucket(laddr, faddr);
@@ -154,9 +171,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
        int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
 
        rcu_read_lock();
-       conn = rds_conn_lookup(net, head, laddr, faddr, trans);
-       if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
-           laddr == faddr && !is_outgoing) {
+       conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if);
+       if (conn &&
+           conn->c_loopback &&
+           conn->c_trans != &rds_loop_transport &&
+           ipv6_addr_equal(laddr, faddr) &&
+           !is_outgoing) {
                /* This is a looped back IB connection, and we're
                 * called by the code handling the incoming connect.
                 * We need a second connection object into which we
@@ -181,8 +201,22 @@ static struct rds_connection *__rds_conn_create(struct net *net,
        }
 
        INIT_HLIST_NODE(&conn->c_hash_node);
-       conn->c_laddr = laddr;
-       conn->c_faddr = faddr;
+       conn->c_laddr = *laddr;
+       conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
+       conn->c_faddr = *faddr;
+       conn->c_dev_if = dev_if;
+
+#if IS_ENABLED(CONFIG_IPV6)
+       /* If the local address is link local, set c_bound_if to be the
+        * index used for this connection.  Otherwise, set it to 0 as
+        * the socket is not bound to an interface.  c_bound_if is used
+        * to look up a socket when a packet is received
+        */
+       if (ipv6_addr_type(laddr) & IPV6_ADDR_LINKLOCAL)
+               conn->c_bound_if = dev_if;
+       else
+#endif
+               conn->c_bound_if = 0;
 
        rds_conn_net_set(conn, net);
 
@@ -199,7 +233,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
         * can bind to the destination address then we'd rather the messages
         * flow through loopback rather than either transport.
         */
-       loop_trans = rds_trans_get_preferred(net, faddr);
+       loop_trans = rds_trans_get_preferred(net, faddr, conn->c_dev_if);
        if (loop_trans) {
                rds_trans_put(loop_trans);
                conn->c_loopback = 1;
@@ -233,10 +267,10 @@ static struct rds_connection *__rds_conn_create(struct net *net,
                goto out;
        }
 
-       rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
-         conn, &laddr, &faddr,
-         strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name :
-         "[unknown]", is_outgoing ? "(outgoing)" : "");
+       rdsdebug("allocated conn %p for %pI6c -> %pI6c over %s %s\n",
+                conn, laddr, faddr,
+                strnlen(trans->t_name, sizeof(trans->t_name)) ?
+                trans->t_name : "[unknown]", is_outgoing ? "(outgoing)" : "");
 
        /*
         * Since we ran without holding the conn lock, someone could
@@ -262,7 +296,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
                /* Creating normal conn */
                struct rds_connection *found;
 
-               found = rds_conn_lookup(net, head, laddr, faddr, trans);
+               found = rds_conn_lookup(net, head, laddr, faddr, trans,
+                                       dev_if);
                if (found) {
                        struct rds_conn_path *cp;
                        int i;
@@ -295,18 +330,22 @@ out:
 }
 
 struct rds_connection *rds_conn_create(struct net *net,
-                                      __be32 laddr, __be32 faddr,
-                                      struct rds_transport *trans, gfp_t gfp)
+                                      const struct in6_addr *laddr,
+                                      const struct in6_addr *faddr,
+                                      struct rds_transport *trans, gfp_t gfp,
+                                      int dev_if)
 {
-       return __rds_conn_create(net, laddr, faddr, trans, gfp, 0);
+       return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create);
 
 struct rds_connection *rds_conn_create_outgoing(struct net *net,
-                                               __be32 laddr, __be32 faddr,
-                                      struct rds_transport *trans, gfp_t gfp)
+                                               const struct in6_addr *laddr,
+                                               const struct in6_addr *faddr,
+                                               struct rds_transport *trans,
+                                               gfp_t gfp, int dev_if)
 {
-       return __rds_conn_create(net, laddr, faddr, trans, gfp, 1);
+       return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
 
@@ -464,10 +503,23 @@ void rds_conn_destroy(struct rds_connection *conn)
 }
 EXPORT_SYMBOL_GPL(rds_conn_destroy);
 
-static void rds_conn_message_info(struct socket *sock, unsigned int len,
-                                 struct rds_info_iterator *iter,
-                                 struct rds_info_lengths *lens,
-                                 int want_send)
+static void __rds_inc_msg_cp(struct rds_incoming *inc,
+                            struct rds_info_iterator *iter,
+                            void *saddr, void *daddr, int flip, bool isv6)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       if (isv6)
+               rds6_inc_info_copy(inc, iter, saddr, daddr, flip);
+       else
+#endif
+               rds_inc_info_copy(inc, iter, *(__be32 *)saddr,
+                                 *(__be32 *)daddr, flip);
+}
+
+static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
+                                     struct rds_info_iterator *iter,
+                                     struct rds_info_lengths *lens,
+                                     int want_send, bool isv6)
 {
        struct hlist_head *head;
        struct list_head *list;
@@ -478,7 +530,10 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
        size_t i;
        int j;
 
-       len /= sizeof(struct rds_info_message);
+       if (isv6)
+               len /= sizeof(struct rds6_info_message);
+       else
+               len /= sizeof(struct rds_info_message);
 
        rcu_read_lock();
 
@@ -488,6 +543,9 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
                        struct rds_conn_path *cp;
                        int npaths;
 
+                       if (!isv6 && conn->c_isv6)
+                               continue;
+
                        npaths = (conn->c_trans->t_mp_capable ?
                                 RDS_MPATH_WORKERS : 1);
 
@@ -504,11 +562,11 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
                                list_for_each_entry(rm, list, m_conn_item) {
                                        total++;
                                        if (total <= len)
-                                               rds_inc_info_copy(&rm->m_inc,
-                                                                 iter,
-                                                                 conn->c_laddr,
-                                                                 conn->c_faddr,
-                                                                 0);
+                                               __rds_inc_msg_cp(&rm->m_inc,
+                                                                iter,
+                                                                &conn->c_laddr,
+                                                                &conn->c_faddr,
+                                                                0, isv6);
                                }
 
                                spin_unlock_irqrestore(&cp->cp_lock, flags);
@@ -518,9 +576,30 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
        rcu_read_unlock();
 
        lens->nr = total;
-       lens->each = sizeof(struct rds_info_message);
+       if (isv6)
+               lens->each = sizeof(struct rds6_info_message);
+       else
+               lens->each = sizeof(struct rds_info_message);
 }
 
+static void rds_conn_message_info(struct socket *sock, unsigned int len,
+                                 struct rds_info_iterator *iter,
+                                 struct rds_info_lengths *lens,
+                                 int want_send)
+{
+       rds_conn_message_info_cmn(sock, len, iter, lens, want_send, false);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void rds6_conn_message_info(struct socket *sock, unsigned int len,
+                                  struct rds_info_iterator *iter,
+                                  struct rds_info_lengths *lens,
+                                  int want_send)
+{
+       rds_conn_message_info_cmn(sock, len, iter, lens, want_send, true);
+}
+#endif
+
 static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
                                       struct rds_info_iterator *iter,
                                       struct rds_info_lengths *lens)
@@ -528,6 +607,15 @@ static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
        rds_conn_message_info(sock, len, iter, lens, 1);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static void rds6_conn_message_info_send(struct socket *sock, unsigned int len,
+                                       struct rds_info_iterator *iter,
+                                       struct rds_info_lengths *lens)
+{
+       rds6_conn_message_info(sock, len, iter, lens, 1);
+}
+#endif
+
 static void rds_conn_message_info_retrans(struct socket *sock,
                                          unsigned int len,
                                          struct rds_info_iterator *iter,
@@ -536,6 +624,16 @@ static void rds_conn_message_info_retrans(struct socket *sock,
        rds_conn_message_info(sock, len, iter, lens, 0);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static void rds6_conn_message_info_retrans(struct socket *sock,
+                                          unsigned int len,
+                                          struct rds_info_iterator *iter,
+                                          struct rds_info_lengths *lens)
+{
+       rds6_conn_message_info(sock, len, iter, lens, 0);
+}
+#endif
+
 void rds_for_each_conn_info(struct socket *sock, unsigned int len,
                          struct rds_info_iterator *iter,
                          struct rds_info_lengths *lens,
@@ -584,7 +682,6 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
        struct hlist_head *head;
        struct rds_connection *conn;
        size_t i;
-       int j;
 
        rcu_read_lock();
 
@@ -595,17 +692,20 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
             i++, head++) {
                hlist_for_each_entry_rcu(conn, head, c_hash_node) {
                        struct rds_conn_path *cp;
-                       int npaths;
 
-                       npaths = (conn->c_trans->t_mp_capable ?
-                                RDS_MPATH_WORKERS : 1);
-                       for (j = 0; j < npaths; j++) {
-                               cp = &conn->c_path[j];
+                       /* XXX We only copy the information from the first
+                        * path for now.  The problem is that if there are
+                        * more than one underlying paths, we cannot report
+                        * information of all of them using the existing
+                        * API.  For example, there is only one next_tx_seq,
+                        * which path's next_tx_seq should we report?  It is
+                        * a bug in the design of MPRDS.
+                        */
+                       cp = conn->c_path;
 
-                               /* XXX no cp_lock usage.. */
-                               if (!visitor(cp, buffer))
-                                       continue;
-                       }
+                       /* XXX no cp_lock usage.. */
+                       if (!visitor(cp, buffer))
+                               continue;
 
                        /* We copy as much as we can fit in the buffer,
                         * but we count all items so that the caller
@@ -624,12 +724,16 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
 static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
 {
        struct rds_info_connection *cinfo = buffer;
+       struct rds_connection *conn = cp->cp_conn;
+
+       if (conn->c_isv6)
+               return 0;
 
        cinfo->next_tx_seq = cp->cp_next_tx_seq;
        cinfo->next_rx_seq = cp->cp_next_rx_seq;
-       cinfo->laddr = cp->cp_conn->c_laddr;
-       cinfo->faddr = cp->cp_conn->c_faddr;
-       strncpy(cinfo->transport, cp->cp_conn->c_trans->t_name,
+       cinfo->laddr = conn->c_laddr.s6_addr32[3];
+       cinfo->faddr = conn->c_faddr.s6_addr32[3];
+       strncpy(cinfo->transport, conn->c_trans->t_name,
                sizeof(cinfo->transport));
        cinfo->flags = 0;
 
@@ -645,6 +749,36 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
        return 1;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static int rds6_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
+{
+       struct rds6_info_connection *cinfo6 = buffer;
+       struct rds_connection *conn = cp->cp_conn;
+
+       cinfo6->next_tx_seq = cp->cp_next_tx_seq;
+       cinfo6->next_rx_seq = cp->cp_next_rx_seq;
+       cinfo6->laddr = conn->c_laddr;
+       cinfo6->faddr = conn->c_faddr;
+       strncpy(cinfo6->transport, conn->c_trans->t_name,
+               sizeof(cinfo6->transport));
+       cinfo6->flags = 0;
+
+       rds_conn_info_set(cinfo6->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags),
+                         SENDING);
+       /* XXX Future: return the state rather than these funky bits */
+       rds_conn_info_set(cinfo6->flags,
+                         atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING,
+                         CONNECTING);
+       rds_conn_info_set(cinfo6->flags,
+                         atomic_read(&cp->cp_state) == RDS_CONN_UP,
+                         CONNECTED);
+       /* Just return 1 as there is no error case. This is a helper function
+        * for rds_walk_conn_path_info() and it wants a return value.
+        */
+       return 1;
+}
+#endif
+
 static void rds_conn_info(struct socket *sock, unsigned int len,
                          struct rds_info_iterator *iter,
                          struct rds_info_lengths *lens)
@@ -657,6 +791,20 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
                                sizeof(struct rds_info_connection));
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static void rds6_conn_info(struct socket *sock, unsigned int len,
+                          struct rds_info_iterator *iter,
+                          struct rds_info_lengths *lens)
+{
+       u64 buffer[(sizeof(struct rds6_info_connection) + 7) / 8];
+
+       rds_walk_conn_path_info(sock, len, iter, lens,
+                               rds6_conn_info_visitor,
+                               buffer,
+                               sizeof(struct rds6_info_connection));
+}
+#endif
+
 int rds_conn_init(void)
 {
        int ret;
@@ -678,7 +826,13 @@ int rds_conn_init(void)
                               rds_conn_message_info_send);
        rds_info_register_func(RDS_INFO_RETRANS_MESSAGES,
                               rds_conn_message_info_retrans);
-
+#if IS_ENABLED(CONFIG_IPV6)
+       rds_info_register_func(RDS6_INFO_CONNECTIONS, rds6_conn_info);
+       rds_info_register_func(RDS6_INFO_SEND_MESSAGES,
+                              rds6_conn_message_info_send);
+       rds_info_register_func(RDS6_INFO_RETRANS_MESSAGES,
+                              rds6_conn_message_info_retrans);
+#endif
        return 0;
 }
 
@@ -696,6 +850,13 @@ void rds_conn_exit(void)
                                 rds_conn_message_info_send);
        rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES,
                                 rds_conn_message_info_retrans);
+#if IS_ENABLED(CONFIG_IPV6)
+       rds_info_deregister_func(RDS6_INFO_CONNECTIONS, rds6_conn_info);
+       rds_info_deregister_func(RDS6_INFO_SEND_MESSAGES,
+                                rds6_conn_message_info_send);
+       rds_info_deregister_func(RDS6_INFO_RETRANS_MESSAGES,
+                                rds6_conn_message_info_retrans);
+#endif
 }
 
 /*
index b6ad38e48f62692fa9dc6cc9f7c7081c706394a7..89c6333ecd39a092d45283705da01e1fce0b98ab 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -39,6 +39,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <net/addrconf.h>
 
 #include "rds_single_path.h"
 #include "rds.h"
@@ -295,9 +296,11 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
        /* We will only ever look at IB transports */
        if (conn->c_trans != &rds_ib_transport)
                return 0;
+       if (conn->c_isv6)
+               return 0;
 
-       iinfo->src_addr = conn->c_laddr;
-       iinfo->dst_addr = conn->c_faddr;
+       iinfo->src_addr = conn->c_laddr.s6_addr32[3];
+       iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
 
        memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
        memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
@@ -318,6 +321,45 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
        return 1;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+/* IPv6 version of rds_ib_conn_info_visitor(). */
+static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
+                                    void *buffer)
+{
+       struct rds6_info_rdma_connection *iinfo6 = buffer;
+       struct rds_ib_connection *ic;
+
+       /* We will only ever look at IB transports */
+       if (conn->c_trans != &rds_ib_transport)
+               return 0;
+
+       iinfo6->src_addr = conn->c_laddr;
+       iinfo6->dst_addr = conn->c_faddr;
+
+       memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid));
+       memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid));
+
+       if (rds_conn_state(conn) == RDS_CONN_UP) {
+               struct rds_ib_device *rds_ibdev;
+               struct rdma_dev_addr *dev_addr;
+
+               ic = conn->c_transport_data;
+               dev_addr = &ic->i_cm_id->route.addr.dev_addr;
+               rdma_addr_get_sgid(dev_addr,
+                                  (union ib_gid *)&iinfo6->src_gid);
+               rdma_addr_get_dgid(dev_addr,
+                                  (union ib_gid *)&iinfo6->dst_gid);
+
+               rds_ibdev = ic->rds_ibdev;
+               iinfo6->max_send_wr = ic->i_send_ring.w_nr;
+               iinfo6->max_recv_wr = ic->i_recv_ring.w_nr;
+               iinfo6->max_send_sge = rds_ibdev->max_sge;
+               rds6_ib_get_mr_info(rds_ibdev, iinfo6);
+       }
+       return 1;
+}
+#endif
+
 static void rds_ib_ic_info(struct socket *sock, unsigned int len,
                           struct rds_info_iterator *iter,
                           struct rds_info_lengths *lens)
@@ -330,6 +372,20 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
                                sizeof(struct rds_info_rdma_connection));
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+/* IPv6 version of rds_ib_ic_info(). */
+static void rds6_ib_ic_info(struct socket *sock, unsigned int len,
+                           struct rds_info_iterator *iter,
+                           struct rds_info_lengths *lens)
+{
+       u64 buffer[(sizeof(struct rds6_info_rdma_connection) + 7) / 8];
+
+       rds_for_each_conn_info(sock, len, iter, lens,
+                              rds6_ib_conn_info_visitor,
+                              buffer,
+                              sizeof(struct rds6_info_rdma_connection));
+}
+#endif
 
 /*
  * Early RDS/IB was built to only bind to an address if there is an IPoIB
@@ -341,12 +397,19 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
  * allowed to influence which paths have priority.  We could call userspace
  * asserting this policy "routing".
  */
-static int rds_ib_laddr_check(struct net *net, __be32 addr)
+static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
+                             __u32 scope_id)
 {
        int ret;
        struct rdma_cm_id *cm_id;
+#if IS_ENABLED(CONFIG_IPV6)
+       struct sockaddr_in6 sin6;
+#endif
        struct sockaddr_in sin;
+       struct sockaddr *sa;
+       bool isv4;
 
+       isv4 = ipv6_addr_v4mapped(addr);
        /* Create a CMA ID and try to bind it. This catches both
         * IB and iWARP capable NICs.
         */
@@ -355,22 +418,66 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
        if (IS_ERR(cm_id))
                return PTR_ERR(cm_id);
 
-       memset(&sin, 0, sizeof(sin));
-       sin.sin_family = AF_INET;
-       sin.sin_addr.s_addr = addr;
+       if (isv4) {
+               memset(&sin, 0, sizeof(sin));
+               sin.sin_family = AF_INET;
+               sin.sin_addr.s_addr = addr->s6_addr32[3];
+               sa = (struct sockaddr *)&sin;
+       } else {
+#if IS_ENABLED(CONFIG_IPV6)
+               memset(&sin6, 0, sizeof(sin6));
+               sin6.sin6_family = AF_INET6;
+               sin6.sin6_addr = *addr;
+               sin6.sin6_scope_id = scope_id;
+               sa = (struct sockaddr *)&sin6;
+
+               /* XXX Do a special IPv6 link local address check here.  The
+                * reason is that rdma_bind_addr() always succeeds with IPv6
+                * link local address regardless it is indeed configured in a
+                * system.
+                */
+               if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) {
+                       struct net_device *dev;
+
+                       if (scope_id == 0) {
+                               ret = -EADDRNOTAVAIL;
+                               goto out;
+                       }
+
+                       /* Use init_net for now as RDS is not network
+                        * name space aware.
+                        */
+                       dev = dev_get_by_index(&init_net, scope_id);
+                       if (!dev) {
+                               ret = -EADDRNOTAVAIL;
+                               goto out;
+                       }
+                       if (!ipv6_chk_addr(&init_net, addr, dev, 1)) {
+                               dev_put(dev);
+                               ret = -EADDRNOTAVAIL;
+                               goto out;
+                       }
+                       dev_put(dev);
+               }
+#else
+               ret = -EADDRNOTAVAIL;
+               goto out;
+#endif
+       }
 
        /* rdma_bind_addr will only succeed for IB & iWARP devices */
-       ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
+       ret = rdma_bind_addr(cm_id, sa);
        /* due to this, we will claim to support iWARP devices unless we
           check node_type. */
        if (ret || !cm_id->device ||
            cm_id->device->node_type != RDMA_NODE_IB_CA)
                ret = -EADDRNOTAVAIL;
 
-       rdsdebug("addr %pI4 ret %d node type %d\n",
-               &addr, ret,
-               cm_id->device ? cm_id->device->node_type : -1);
+       rdsdebug("addr %pI6c%%%u ret %d node type %d\n",
+                addr, scope_id, ret,
+                cm_id->device ? cm_id->device->node_type : -1);
 
+out:
        rdma_destroy_id(cm_id);
 
        return ret;
@@ -401,6 +508,9 @@ void rds_ib_exit(void)
        rds_ib_set_unloading();
        synchronize_rcu();
        rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
+#if IS_ENABLED(CONFIG_IPV6)
+       rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info);
+#endif
        rds_ib_unregister_client();
        rds_ib_destroy_nodev_conns();
        rds_ib_sysctl_exit();
@@ -462,6 +572,9 @@ int rds_ib_init(void)
        rds_trans_register(&rds_ib_transport);
 
        rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
+#if IS_ENABLED(CONFIG_IPV6)
+       rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info);
+#endif
 
        goto out;
 
@@ -476,4 +589,3 @@ out:
 }
 
 MODULE_LICENSE("GPL");
-
index a6f4d7d68e954ddfd95584c4197786a8a200211b..73427ff439f90fc069ac44e53a5f8e2597a85bc6 100644 (file)
@@ -57,16 +57,44 @@ struct rds_ib_refill_cache {
        struct list_head         *ready;
 };
 
+/* This is the common structure for the IB private data exchange in setting up
+ * an RDS connection.  The exchange is different for IPv4 and IPv6 connections.
+ * The reason is that the address size is different and the addresses
+ * exchanged are in the beginning of the structure.  Hence it is not possible
+ * for interoperability if same structure is used.
+ */
+struct rds_ib_conn_priv_cmn {
+       u8                      ricpc_protocol_major;
+       u8                      ricpc_protocol_minor;
+       __be16                  ricpc_protocol_minor_mask;      /* bitmask */
+       __be32                  ricpc_reserved1;
+       __be64                  ricpc_ack_seq;
+       __be32                  ricpc_credit;   /* non-zero enables flow ctl */
+};
+
 struct rds_ib_connect_private {
        /* Add new fields at the end, and don't permute existing fields. */
-       __be32                  dp_saddr;
-       __be32                  dp_daddr;
-       u8                      dp_protocol_major;
-       u8                      dp_protocol_minor;
-       __be16                  dp_protocol_minor_mask; /* bitmask */
-       __be32                  dp_reserved1;
-       __be64                  dp_ack_seq;
-       __be32                  dp_credit;              /* non-zero enables flow ctl */
+       __be32                          dp_saddr;
+       __be32                          dp_daddr;
+       struct rds_ib_conn_priv_cmn     dp_cmn;
+};
+
+struct rds6_ib_connect_private {
+       /* Add new fields at the end, and don't permute existing fields. */
+       struct in6_addr                 dp_saddr;
+       struct in6_addr                 dp_daddr;
+       struct rds_ib_conn_priv_cmn     dp_cmn;
+};
+
+#define dp_protocol_major      dp_cmn.ricpc_protocol_major
+#define dp_protocol_minor      dp_cmn.ricpc_protocol_minor
+#define dp_protocol_minor_mask dp_cmn.ricpc_protocol_minor_mask
+#define dp_ack_seq             dp_cmn.ricpc_ack_seq
+#define dp_credit              dp_cmn.ricpc_credit
+
+union rds_ib_conn_priv {
+       struct rds_ib_connect_private   ricp_v4;
+       struct rds6_ib_connect_private  ricp_v6;
 };
 
 struct rds_ib_send_work {
@@ -351,8 +379,8 @@ void rds_ib_listen_stop(void);
 __printf(2, 3)
 void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
 int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
-                            struct rdma_cm_event *event);
-int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id);
+                            struct rdma_cm_event *event, bool isv6);
+int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6);
 void rds_ib_cm_connect_complete(struct rds_connection *conn,
                                struct rdma_cm_event *event);
 
@@ -361,7 +389,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
        __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
 
 /* ib_rdma.c */
-int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
+int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,
+                        struct in6_addr *ipaddr);
 void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
 void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
 void rds_ib_destroy_nodev_conns(void);
@@ -371,7 +400,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
 int rds_ib_recv_init(void);
 void rds_ib_recv_exit(void);
 int rds_ib_recv_path(struct rds_conn_path *conn);
-int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
+int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic, gfp_t gfp);
 void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
 void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
 void rds_ib_inc_free(struct rds_incoming *inc);
index f1684ae6abfd520fc0579a50f007316e9a09c31d..bfbb31f0c7fd9a83a90a954c7eabc69780615788 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/ratelimit.h>
+#include <net/addrconf.h>
 
 #include "rds_single_path.h"
 #include "rds.h"
@@ -95,25 +96,45 @@ rds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr)
  */
 void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
 {
-       const struct rds_ib_connect_private *dp = NULL;
        struct rds_ib_connection *ic = conn->c_transport_data;
+       const union rds_ib_conn_priv *dp = NULL;
        struct ib_qp_attr qp_attr;
+       __be64 ack_seq = 0;
+       __be32 credit = 0;
+       u8 major = 0;
+       u8 minor = 0;
        int err;
 
-       if (event->param.conn.private_data_len >= sizeof(*dp)) {
-               dp = event->param.conn.private_data;
-
-               /* make sure it isn't empty data */
-               if (dp->dp_protocol_major) {
-                       rds_ib_set_protocol(conn,
-                               RDS_PROTOCOL(dp->dp_protocol_major,
-                               dp->dp_protocol_minor));
-                       rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
+       dp = event->param.conn.private_data;
+       if (conn->c_isv6) {
+               if (event->param.conn.private_data_len >=
+                   sizeof(struct rds6_ib_connect_private)) {
+                       major = dp->ricp_v6.dp_protocol_major;
+                       minor = dp->ricp_v6.dp_protocol_minor;
+                       credit = dp->ricp_v6.dp_credit;
+                       /* dp structure start is not guaranteed to be 8 bytes
+                        * aligned.  Since dp_ack_seq is 64-bit extended load
+                        * operations can be used so go through get_unaligned
+                        * to avoid unaligned errors.
+                        */
+                       ack_seq = get_unaligned(&dp->ricp_v6.dp_ack_seq);
                }
+       } else if (event->param.conn.private_data_len >=
+                  sizeof(struct rds_ib_connect_private)) {
+               major = dp->ricp_v4.dp_protocol_major;
+               minor = dp->ricp_v4.dp_protocol_minor;
+               credit = dp->ricp_v4.dp_credit;
+               ack_seq = get_unaligned(&dp->ricp_v4.dp_ack_seq);
+       }
+
+       /* make sure it isn't empty data */
+       if (major) {
+               rds_ib_set_protocol(conn, RDS_PROTOCOL(major, minor));
+               rds_ib_set_flow_control(conn, be32_to_cpu(credit));
        }
 
        if (conn->c_version < RDS_PROTOCOL(3, 1)) {
-               pr_notice("RDS/IB: Connection <%pI4,%pI4> version %u.%u no longer supported\n",
+               pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n",
                          &conn->c_laddr, &conn->c_faddr,
                          RDS_PROTOCOL_MAJOR(conn->c_version),
                          RDS_PROTOCOL_MINOR(conn->c_version));
@@ -121,7 +142,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
                rds_conn_destroy(conn);
                return;
        } else {
-               pr_notice("RDS/IB: %s conn connected <%pI4,%pI4> version %u.%u%s\n",
+               pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n",
                          ic->i_active_side ? "Active" : "Passive",
                          &conn->c_laddr, &conn->c_faddr,
                          RDS_PROTOCOL_MAJOR(conn->c_version),
@@ -150,7 +171,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
                printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
 
        /* update ib_device with this local ipaddr */
-       err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);
+       err = rds_ib_update_ipaddr(ic->rds_ibdev, &conn->c_laddr);
        if (err)
                printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
                        err);
@@ -158,14 +179,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
        /* If the peer gave us the last packet it saw, process this as if
         * we had received a regular ACK. */
        if (dp) {
-               /* dp structure start is not guaranteed to be 8 bytes aligned.
-                * Since dp_ack_seq is 64-bit extended load operations can be
-                * used so go through get_unaligned to avoid unaligned errors.
-                */
-               __be64 dp_ack_seq = get_unaligned(&dp->dp_ack_seq);
-
-               if (dp_ack_seq)
-                       rds_send_drop_acked(conn, be64_to_cpu(dp_ack_seq),
+               if (ack_seq)
+                       rds_send_drop_acked(conn, be64_to_cpu(ack_seq),
                                            NULL);
        }
 
@@ -173,11 +188,12 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
 }
 
 static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
-                       struct rdma_conn_param *conn_param,
-                       struct rds_ib_connect_private *dp,
-                       u32 protocol_version,
-                       u32 max_responder_resources,
-                       u32 max_initiator_depth)
+                                     struct rdma_conn_param *conn_param,
+                                     union rds_ib_conn_priv *dp,
+                                     u32 protocol_version,
+                                     u32 max_responder_resources,
+                                     u32 max_initiator_depth,
+                                     bool isv6)
 {
        struct rds_ib_connection *ic = conn->c_transport_data;
        struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
@@ -193,24 +209,49 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
 
        if (dp) {
                memset(dp, 0, sizeof(*dp));
-               dp->dp_saddr = conn->c_laddr;
-               dp->dp_daddr = conn->c_faddr;
-               dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
-               dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
-               dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
-               dp->dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic));
+               if (isv6) {
+                       dp->ricp_v6.dp_saddr = conn->c_laddr;
+                       dp->ricp_v6.dp_daddr = conn->c_faddr;
+                       dp->ricp_v6.dp_protocol_major =
+                           RDS_PROTOCOL_MAJOR(protocol_version);
+                       dp->ricp_v6.dp_protocol_minor =
+                           RDS_PROTOCOL_MINOR(protocol_version);
+                       dp->ricp_v6.dp_protocol_minor_mask =
+                           cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
+                       dp->ricp_v6.dp_ack_seq =
+                           cpu_to_be64(rds_ib_piggyb_ack(ic));
+
+                       conn_param->private_data = &dp->ricp_v6;
+                       conn_param->private_data_len = sizeof(dp->ricp_v6);
+               } else {
+                       dp->ricp_v4.dp_saddr = conn->c_laddr.s6_addr32[3];
+                       dp->ricp_v4.dp_daddr = conn->c_faddr.s6_addr32[3];
+                       dp->ricp_v4.dp_protocol_major =
+                           RDS_PROTOCOL_MAJOR(protocol_version);
+                       dp->ricp_v4.dp_protocol_minor =
+                           RDS_PROTOCOL_MINOR(protocol_version);
+                       dp->ricp_v4.dp_protocol_minor_mask =
+                           cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
+                       dp->ricp_v4.dp_ack_seq =
+                           cpu_to_be64(rds_ib_piggyb_ack(ic));
+
+                       conn_param->private_data = &dp->ricp_v4;
+                       conn_param->private_data_len = sizeof(dp->ricp_v4);
+               }
 
                /* Advertise flow control */
                if (ic->i_flowctl) {
                        unsigned int credits;
 
-                       credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
-                       dp->dp_credit = cpu_to_be32(credits);
-                       atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits);
+                       credits = IB_GET_POST_CREDITS
+                               (atomic_read(&ic->i_credits));
+                       if (isv6)
+                               dp->ricp_v6.dp_credit = cpu_to_be32(credits);
+                       else
+                               dp->ricp_v4.dp_credit = cpu_to_be32(credits);
+                       atomic_sub(IB_SET_POST_CREDITS(credits),
+                                  &ic->i_credits);
                }
-
-               conn_param->private_data = dp;
-               conn_param->private_data_len = sizeof(*dp);
        }
 }
 
@@ -349,7 +390,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
                break;
        default:
                rdsdebug("Fatal QP Event %u (%s) "
-                       "- connection %pI4->%pI4, reconnecting\n",
+                       "- connection %pI6c->%pI6c, reconnecting\n",
                        event->event, ib_event_msg(event->event),
                        &conn->c_laddr, &conn->c_faddr);
                rds_conn_drop(conn);
@@ -580,11 +621,13 @@ out:
        return ret;
 }
 
-static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event)
+static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
 {
-       const struct rds_ib_connect_private *dp = event->param.conn.private_data;
-       u16 common;
+       const union rds_ib_conn_priv *dp = event->param.conn.private_data;
+       u8 data_len, major, minor;
        u32 version = 0;
+       __be16 mask;
+       u16 common;
 
        /*
         * rdma_cm private data is odd - when there is any private data in the
@@ -603,51 +646,140 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event)
                return 0;
        }
 
+       if (isv6) {
+               data_len = sizeof(struct rds6_ib_connect_private);
+               major = dp->ricp_v6.dp_protocol_major;
+               minor = dp->ricp_v6.dp_protocol_minor;
+               mask = dp->ricp_v6.dp_protocol_minor_mask;
+       } else {
+               data_len = sizeof(struct rds_ib_connect_private);
+               major = dp->ricp_v4.dp_protocol_major;
+               minor = dp->ricp_v4.dp_protocol_minor;
+               mask = dp->ricp_v4.dp_protocol_minor_mask;
+       }
+
        /* Even if len is crap *now* I still want to check it. -ASG */
-       if (event->param.conn.private_data_len < sizeof (*dp) ||
-           dp->dp_protocol_major == 0)
+       if (event->param.conn.private_data_len < data_len || major == 0)
                return RDS_PROTOCOL_3_0;
 
-       common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS;
-       if (dp->dp_protocol_major == 3 && common) {
+       common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS;
+       if (major == 3 && common) {
                version = RDS_PROTOCOL_3_0;
                while ((common >>= 1) != 0)
                        version++;
-       } else
-               printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n",
-                               &dp->dp_saddr,
-                               dp->dp_protocol_major,
-                               dp->dp_protocol_minor);
+       } else {
+               if (isv6)
+                       printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n",
+                                          &dp->ricp_v6.dp_saddr, major, minor);
+               else
+                       printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n",
+                                          &dp->ricp_v4.dp_saddr, major, minor);
+       }
        return version;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+/* Given an IPv6 address, find the net_device which hosts that address and
+ * return its index.  This is used by the rds_ib_cm_handle_connect() code to
+ * find the interface index of where an incoming request comes from when
+ * the request is using a link local address.
+ *
+ * Note one problem in this search.  It is possible that two interfaces have
+ * the same link local address.  Unfortunately, this cannot be solved unless
+ * the underlying layer gives us the interface which an incoming RDMA connect
+ * request comes from.
+ */
+static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr)
+{
+       struct net_device *dev;
+       int idx = 0;
+
+       rcu_read_lock();
+       for_each_netdev_rcu(net, dev) {
+               if (ipv6_chk_addr(net, addr, dev, 1)) {
+                       idx = dev->ifindex;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       return idx;
+}
+#endif
+
 int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
-                                   struct rdma_cm_event *event)
+                            struct rdma_cm_event *event, bool isv6)
 {
        __be64 lguid = cm_id->route.path_rec->sgid.global.interface_id;
        __be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
-       const struct rds_ib_connect_private *dp = event->param.conn.private_data;
-       struct rds_ib_connect_private dp_rep;
+       const struct rds_ib_conn_priv_cmn *dp_cmn;
        struct rds_connection *conn = NULL;
        struct rds_ib_connection *ic = NULL;
        struct rdma_conn_param conn_param;
+       const union rds_ib_conn_priv *dp;
+       union rds_ib_conn_priv dp_rep;
+       struct in6_addr s_mapped_addr;
+       struct in6_addr d_mapped_addr;
+       const struct in6_addr *saddr6;
+       const struct in6_addr *daddr6;
+       int destroy = 1;
+       u32 ifindex = 0;
        u32 version;
-       int err = 1, destroy = 1;
+       int err = 1;
 
        /* Check whether the remote protocol version matches ours. */
-       version = rds_ib_protocol_compatible(event);
+       version = rds_ib_protocol_compatible(event, isv6);
        if (!version)
                goto out;
 
-       rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u lguid 0x%llx fguid "
-                "0x%llx\n", &dp->dp_saddr, &dp->dp_daddr,
+       dp = event->param.conn.private_data;
+       if (isv6) {
+#if IS_ENABLED(CONFIG_IPV6)
+               dp_cmn = &dp->ricp_v6.dp_cmn;
+               saddr6 = &dp->ricp_v6.dp_saddr;
+               daddr6 = &dp->ricp_v6.dp_daddr;
+               /* If either address is link local, need to find the
+                * interface index in order to create a proper RDS
+                * connection.
+                */
+               if (ipv6_addr_type(daddr6) & IPV6_ADDR_LINKLOCAL) {
+                       /* Using init_net for now ..  */
+                       ifindex = __rds_find_ifindex(&init_net, daddr6);
+                       /* No index found...  Need to bail out. */
+                       if (ifindex == 0) {
+                               err = -EOPNOTSUPP;
+                               goto out;
+                       }
+               } else if (ipv6_addr_type(saddr6) & IPV6_ADDR_LINKLOCAL) {
+                       /* Use our address to find the correct index. */
+                       ifindex = __rds_find_ifindex(&init_net, daddr6);
+                       /* No index found...  Need to bail out. */
+                       if (ifindex == 0) {
+                               err = -EOPNOTSUPP;
+                               goto out;
+                       }
+               }
+#else
+               err = -EOPNOTSUPP;
+               goto out;
+#endif
+       } else {
+               dp_cmn = &dp->ricp_v4.dp_cmn;
+               ipv6_addr_set_v4mapped(dp->ricp_v4.dp_saddr, &s_mapped_addr);
+               ipv6_addr_set_v4mapped(dp->ricp_v4.dp_daddr, &d_mapped_addr);
+               saddr6 = &s_mapped_addr;
+               daddr6 = &d_mapped_addr;
+       }
+
+       rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid "
+                "0x%llx\n", saddr6, daddr6,
                 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
                 (unsigned long long)be64_to_cpu(lguid),
                 (unsigned long long)be64_to_cpu(fguid));
 
        /* RDS/IB is not currently netns aware, thus init_net */
-       conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
-                              &rds_ib_transport, GFP_KERNEL);
+       conn = rds_conn_create(&init_net, daddr6, saddr6,
+                              &rds_ib_transport, GFP_KERNEL, ifindex);
        if (IS_ERR(conn)) {
                rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
                conn = NULL;
@@ -678,12 +810,13 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
        ic = conn->c_transport_data;
 
        rds_ib_set_protocol(conn, version);
-       rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
+       rds_ib_set_flow_control(conn, be32_to_cpu(dp_cmn->ricpc_credit));
 
        /* If the peer gave us the last packet it saw, process this as if
         * we had received a regular ACK. */
-       if (dp->dp_ack_seq)
-               rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
+       if (dp_cmn->ricpc_ack_seq)
+               rds_send_drop_acked(conn, be64_to_cpu(dp_cmn->ricpc_ack_seq),
+                                   NULL);
 
        BUG_ON(cm_id->context);
        BUG_ON(ic->i_cm_id);
@@ -702,8 +835,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
        }
 
        rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
-               event->param.conn.responder_resources,
-               event->param.conn.initiator_depth);
+                                 event->param.conn.responder_resources,
+                                 event->param.conn.initiator_depth, isv6);
 
        /* rdma_accept() calls rdma_reject() internally if it fails */
        if (rdma_accept(cm_id, &conn_param))
@@ -718,12 +851,12 @@ out:
 }
 
 
-int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
+int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
 {
        struct rds_connection *conn = cm_id->context;
        struct rds_ib_connection *ic = conn->c_transport_data;
        struct rdma_conn_param conn_param;
-       struct rds_ib_connect_private dp;
+       union rds_ib_conn_priv dp;
        int ret;
 
        /* If the peer doesn't do protocol negotiation, we must
@@ -738,7 +871,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
        }
 
        rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
-               UINT_MAX, UINT_MAX);
+                                 UINT_MAX, UINT_MAX, isv6);
        ret = rdma_connect(cm_id, &conn_param);
        if (ret)
                rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
@@ -758,13 +891,22 @@ out:
 int rds_ib_conn_path_connect(struct rds_conn_path *cp)
 {
        struct rds_connection *conn = cp->cp_conn;
-       struct rds_ib_connection *ic = conn->c_transport_data;
-       struct sockaddr_in src, dest;
+       struct sockaddr_storage src, dest;
+       rdma_cm_event_handler handler;
+       struct rds_ib_connection *ic;
        int ret;
 
+       ic = conn->c_transport_data;
+
        /* XXX I wonder what affect the port space has */
        /* delegate cm event handler to rdma_transport */
-       ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
+#if IS_ENABLED(CONFIG_IPV6)
+       if (conn->c_isv6)
+               handler = rds6_rdma_cm_event_handler;
+       else
+#endif
+               handler = rds_rdma_cm_event_handler;
+       ic->i_cm_id = rdma_create_id(&init_net, handler, conn,
                                     RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(ic->i_cm_id)) {
                ret = PTR_ERR(ic->i_cm_id);
@@ -775,13 +917,33 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp)
 
        rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
 
-       src.sin_family = AF_INET;
-       src.sin_addr.s_addr = (__force u32)conn->c_laddr;
-       src.sin_port = (__force u16)htons(0);
+       if (ipv6_addr_v4mapped(&conn->c_faddr)) {
+               struct sockaddr_in *sin;
+
+               sin = (struct sockaddr_in *)&src;
+               sin->sin_family = AF_INET;
+               sin->sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
+               sin->sin_port = 0;
 
-       dest.sin_family = AF_INET;
-       dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
-       dest.sin_port = (__force u16)htons(RDS_PORT);
+               sin = (struct sockaddr_in *)&dest;
+               sin->sin_family = AF_INET;
+               sin->sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
+               sin->sin_port = htons(RDS_PORT);
+       } else {
+               struct sockaddr_in6 *sin6;
+
+               sin6 = (struct sockaddr_in6 *)&src;
+               sin6->sin6_family = AF_INET6;
+               sin6->sin6_addr = conn->c_laddr;
+               sin6->sin6_port = 0;
+               sin6->sin6_scope_id = conn->c_dev_if;
+
+               sin6 = (struct sockaddr_in6 *)&dest;
+               sin6->sin6_family = AF_INET6;
+               sin6->sin6_addr = conn->c_faddr;
+               sin6->sin6_port = htons(RDS_CM_PORT);
+               sin6->sin6_scope_id = conn->c_dev_if;
+       }
 
        ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
                                (struct sockaddr *)&dest,
@@ -949,7 +1111,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
        if (!ic)
                return -ENOMEM;
 
-       ret = rds_ib_recv_alloc_caches(ic);
+       ret = rds_ib_recv_alloc_caches(ic, gfp);
        if (ret) {
                kfree(ic);
                return ret;
index 655f01d427fe5c7899f916987dd88022832c8bdc..5da12c2484316cbbde5c33b254d86b061c1b187e 100644 (file)
@@ -113,6 +113,8 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
                                             int npages);
 void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
                        struct rds_info_rdma_connection *iinfo);
+void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
+                        struct rds6_info_rdma_connection *iinfo6);
 void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
                    struct rds_sock *rs, u32 *key_ret,
index 2e49a40a5e113cef44543f112c4911a39501fc7e..63c8d107adcfbec096b3dbcead8de98ec6327bc1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -100,18 +100,19 @@ static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
                kfree_rcu(to_free, rcu);
 }
 
-int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
+int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,
+                        struct in6_addr *ipaddr)
 {
        struct rds_ib_device *rds_ibdev_old;
 
-       rds_ibdev_old = rds_ib_get_device(ipaddr);
+       rds_ibdev_old = rds_ib_get_device(ipaddr->s6_addr32[3]);
        if (!rds_ibdev_old)
-               return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
+               return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]);
 
        if (rds_ibdev_old != rds_ibdev) {
-               rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
+               rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr->s6_addr32[3]);
                rds_ib_dev_put(rds_ibdev_old);
-               return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
+               return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]);
        }
        rds_ib_dev_put(rds_ibdev_old);
 
@@ -179,6 +180,17 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
        iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
+                        struct rds6_info_rdma_connection *iinfo6)
+{
+       struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
+
+       iinfo6->rdma_mr_max = pool_1m->max_items;
+       iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages;
+}
+#endif
+
 struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
 {
        struct rds_ib_mr *ibmr = NULL;
@@ -545,7 +557,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
        struct rds_ib_connection *ic = NULL;
        int ret;
 
-       rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
+       rds_ibdev = rds_ib_get_device(rs->rs_bound_addr.s6_addr32[3]);
        if (!rds_ibdev) {
                ret = -ENODEV;
                goto out;
index b4e421aa9727942e0cbe7ba40e11c3d19d937868..d300186b8dc020ac9da1036b8179e542bb18b14d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -98,12 +98,12 @@ static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
        }
 }
 
-static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
+static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache, gfp_t gfp)
 {
        struct rds_ib_cache_head *head;
        int cpu;
 
-       cache->percpu = alloc_percpu(struct rds_ib_cache_head);
+       cache->percpu = alloc_percpu_gfp(struct rds_ib_cache_head, gfp);
        if (!cache->percpu)
               return -ENOMEM;
 
@@ -118,13 +118,13 @@ static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
        return 0;
 }
 
-int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
+int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic, gfp_t gfp)
 {
        int ret;
 
-       ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
+       ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs, gfp);
        if (!ret) {
-               ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
+               ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags, gfp);
                if (ret)
                        free_percpu(ic->i_cache_incs.percpu);
        }
@@ -266,7 +266,7 @@ static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *i
                rds_ib_stats_inc(s_ib_rx_total_incs);
        }
        INIT_LIST_HEAD(&ibinc->ii_frags);
-       rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
+       rds_inc_init(&ibinc->ii_inc, ic->conn, &ic->conn->c_faddr);
 
        return ibinc;
 }
@@ -376,8 +376,6 @@ static void release_refill(struct rds_connection *conn)
  * This tries to allocate and post unused work requests after making sure that
  * they have all the allocations they need to queue received fragments into
  * sockets.
- *
- * -1 is returned if posting fails due to temporary resource exhaustion.
  */
 void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 {
@@ -420,7 +418,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
                ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
                if (ret) {
                        rds_ib_conn_error(conn, "recv post on "
-                              "%pI4 returned %d, disconnecting and "
+                              "%pI6c returned %d, disconnecting and "
                               "reconnecting\n", &conn->c_faddr,
                               ret);
                        break;
@@ -850,7 +848,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
 
        if (data_len < sizeof(struct rds_header)) {
                rds_ib_conn_error(conn, "incoming message "
-                      "from %pI4 didn't include a "
+                      "from %pI6c didn't include a "
                       "header, disconnecting and "
                       "reconnecting\n",
                       &conn->c_faddr);
@@ -863,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
        /* Validate the checksum. */
        if (!rds_message_verify_checksum(ihdr)) {
                rds_ib_conn_error(conn, "incoming message "
-                      "from %pI4 has corrupted header - "
+                      "from %pI6c has corrupted header - "
                       "forcing a reconnect\n",
                       &conn->c_faddr);
                rds_stats_inc(s_recv_drop_bad_checksum);
@@ -943,10 +941,10 @@ static void rds_ib_process_recv(struct rds_connection *conn,
                ic->i_recv_data_rem = 0;
                ic->i_ibinc = NULL;
 
-               if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
+               if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) {
                        rds_ib_cong_recv(conn, ibinc);
-               else {
-                       rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
+               else {
+                       rds_recv_incoming(conn, &conn->c_faddr, &conn->c_laddr,
                                          &ibinc->ii_inc, GFP_ATOMIC);
                        state->ack_next = be64_to_cpu(hdr->h_sequence);
                        state->ack_next_valid = 1;
@@ -990,7 +988,7 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
        } else {
                /* We expect errors as the qp is drained during shutdown */
                if (rds_conn_up(conn) || rds_conn_connecting(conn))
-                       rds_ib_conn_error(conn, "recv completion on <%pI4,%pI4> had status %u (%s), disconnecting and reconnecting\n",
+                       rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
                                          &conn->c_laddr, &conn->c_faddr,
                                          wc->status,
                                          ib_wc_status_msg(wc->status));
@@ -1025,7 +1023,6 @@ int rds_ib_recv_path(struct rds_conn_path *cp)
 {
        struct rds_connection *conn = cp->cp_conn;
        struct rds_ib_connection *ic = conn->c_transport_data;
-       int ret = 0;
 
        rdsdebug("conn %p\n", conn);
        if (rds_conn_up(conn)) {
@@ -1034,7 +1031,7 @@ int rds_ib_recv_path(struct rds_conn_path *cp)
                rds_ib_stats_inc(s_ib_rx_refill_from_thread);
        }
 
-       return ret;
+       return 0;
 }
 
 int rds_ib_recv_init(void)
index 8557a1cae04170496887815a0f41e7c1abd2d979..c8dd3125d398776318a6391de3c8ceb0b62602f4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -305,7 +305,7 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
 
        /* We expect errors as the qp is drained during shutdown */
        if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
-               rds_ib_conn_error(conn, "send completion on <%pI4,%pI4> had status %u (%s), disconnecting and reconnecting\n",
+               rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
                                  &conn->c_laddr, &conn->c_faddr, wc->status,
                                  ib_wc_status_msg(wc->status));
        }
@@ -730,7 +730,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
                 first, &first->s_wr, ret, failed_wr);
        BUG_ON(failed_wr != &first->s_wr);
        if (ret) {
-               printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
+               printk(KERN_WARNING "RDS/IB: ib_post_send to %pI6c "
                       "returned %d\n", &conn->c_faddr, ret);
                rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
                rds_ib_sub_signaled(ic, nr_sig);
@@ -759,14 +759,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
        struct rds_ib_connection *ic = conn->c_transport_data;
        struct rds_ib_send_work *send = NULL;
        struct ib_send_wr *failed_wr;
-       struct rds_ib_device *rds_ibdev;
        u32 pos;
        u32 work_alloc;
        int ret;
        int nr_sig = 0;
 
-       rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
-
        work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
        if (work_alloc != 1) {
                rds_ib_stats_inc(s_ib_tx_ring_full);
@@ -827,7 +824,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
                 send, &send->s_atomic_wr, ret, failed_wr);
        BUG_ON(failed_wr != &send->s_atomic_wr.wr);
        if (ret) {
-               printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
+               printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI6c "
                       "returned %d\n", &conn->c_faddr, ret);
                rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
                rds_ib_sub_signaled(ic, nr_sig);
@@ -967,7 +964,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
                 first, &first->s_rdma_wr.wr, ret, failed_wr);
        BUG_ON(failed_wr != &first->s_rdma_wr.wr);
        if (ret) {
-               printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
+               printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI6c "
                       "returned %d\n", &conn->c_faddr, ret);
                rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
                rds_ib_sub_signaled(ic, nr_sig);
index feea1f96ee2ad582dce8f815442da1bbf6e0508a..1d73ad79c847bb7d918ba4ec358a4920a8e55c1c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -35,6 +35,7 @@
 #include <linux/in.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <linux/ipv6.h>
 
 #include "rds_single_path.h"
 #include "rds.h"
@@ -88,11 +89,11 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
 
        BUG_ON(hdr_off || sg || off);
 
-       rds_inc_init(&rm->m_inc, conn, conn->c_laddr);
+       rds_inc_init(&rm->m_inc, conn, &conn->c_laddr);
        /* For the embedded inc. Matching put is in loop_inc_free() */
        rds_message_addref(rm);
 
-       rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
+       rds_recv_incoming(conn, &conn->c_laddr, &conn->c_faddr, &rm->m_inc,
                          GFP_KERNEL);
 
        rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence),
index a35f7697198499eece20e1678f5d003d64bf785a..4b00b1152a5f0a885100098e565b881ab68b7e57 100644 (file)
@@ -514,4 +514,3 @@ void rds_message_unmapped(struct rds_message *rm)
        wake_up_interruptible(&rm->m_flush_wait);
 }
 EXPORT_SYMBOL_GPL(rds_message_unmapped);
-
index 80920e47f2c79eb3ce0f95f6acc70e2126af1441..98237feb607ac6f3d24ecd18ecb3320c47065d30 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007 Oracle.  All rights reserved.
+ * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -184,7 +184,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
        long i;
        int ret;
 
-       if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
+       if (ipv6_addr_any(&rs->rs_bound_addr) || !rs->rs_transport) {
                ret = -ENOTCONN; /* XXX not a great errno */
                goto out;
        }
@@ -576,7 +576,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
 
        args = CMSG_DATA(cmsg);
 
-       if (rs->rs_bound_addr == 0) {
+       if (ipv6_addr_any(&rs->rs_bound_addr)) {
                ret = -ENOTCONN; /* XXX not a great errno */
                goto out_ret;
        }
index fc59821f0a27bd2a529c17a36c5b06a1d5d91a8e..6b0f57c83a2ada30112fb89b56e6035aac29224e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Oracle.  All rights reserved.
+ * Copyright (c) 2009, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
 #include "rdma_transport.h"
 #include "ib.h"
 
+/* Global IPv4 and IPv6 RDS RDMA listener cm_id */
 static struct rdma_cm_id *rds_rdma_listen_id;
+#if IS_ENABLED(CONFIG_IPV6)
+static struct rdma_cm_id *rds6_rdma_listen_id;
+#endif
 
-int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
-                             struct rdma_cm_event *event)
+static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
+                                        struct rdma_cm_event *event,
+                                        bool isv6)
 {
        /* this can be null in the listening path */
        struct rds_connection *conn = cm_id->context;
@@ -72,7 +77,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 
        switch (event->event) {
        case RDMA_CM_EVENT_CONNECT_REQUEST:
-               ret = trans->cm_handle_connect(cm_id, event);
+               ret = trans->cm_handle_connect(cm_id, event, isv6);
                break;
 
        case RDMA_CM_EVENT_ADDR_RESOLVED:
@@ -90,7 +95,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 
                        ibic = conn->c_transport_data;
                        if (ibic && ibic->i_cm_id == cm_id)
-                               ret = trans->cm_initiate_connect(cm_id);
+                               ret = trans->cm_initiate_connect(cm_id, isv6);
                        else
                                rds_conn_drop(conn);
                }
@@ -116,14 +121,14 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 
        case RDMA_CM_EVENT_DISCONNECTED:
                rdsdebug("DISCONNECT event - dropping connection "
-                       "%pI4->%pI4\n", &conn->c_laddr,
+                        "%pI6c->%pI6c\n", &conn->c_laddr,
                         &conn->c_faddr);
                rds_conn_drop(conn);
                break;
 
        case RDMA_CM_EVENT_TIMEWAIT_EXIT:
                if (conn) {
-                       pr_info("RDS: RDMA_CM_EVENT_TIMEWAIT_EXIT event: dropping connection %pI4->%pI4\n",
+                       pr_info("RDS: RDMA_CM_EVENT_TIMEWAIT_EXIT event: dropping connection %pI6c->%pI6c\n",
                                &conn->c_laddr, &conn->c_faddr);
                        rds_conn_drop(conn);
                }
@@ -146,13 +151,28 @@ out:
        return ret;
 }
 
-static int rds_rdma_listen_init(void)
+int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
+                             struct rdma_cm_event *event)
+{
+       return rds_rdma_cm_event_handler_cmn(cm_id, event, false);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
+                              struct rdma_cm_event *event)
+{
+       return rds_rdma_cm_event_handler_cmn(cm_id, event, true);
+}
+#endif
+
+static int rds_rdma_listen_init_common(rdma_cm_event_handler handler,
+                                      struct sockaddr *sa,
+                                      struct rdma_cm_id **ret_cm_id)
 {
-       struct sockaddr_in sin;
        struct rdma_cm_id *cm_id;
        int ret;
 
-       cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL,
+       cm_id = rdma_create_id(&init_net, handler, NULL,
                               RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(cm_id)) {
                ret = PTR_ERR(cm_id);
@@ -161,15 +181,11 @@ static int rds_rdma_listen_init(void)
                return ret;
        }
 
-       sin.sin_family = AF_INET;
-       sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
-       sin.sin_port = (__force u16)htons(RDS_PORT);
-
        /*
         * XXX I bet this binds the cm_id to a device.  If we want to support
         * fail-over we'll have to take this into consideration.
         */
-       ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
+       ret = rdma_bind_addr(cm_id, sa);
        if (ret) {
                printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
                       "rdma_bind_addr() returned %d\n", ret);
@@ -185,7 +201,7 @@ static int rds_rdma_listen_init(void)
 
        rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT);
 
-       rds_rdma_listen_id = cm_id;
+       *ret_cm_id = cm_id;
        cm_id = NULL;
 out:
        if (cm_id)
@@ -193,6 +209,45 @@ out:
        return ret;
 }
 
+/* Initialize the RDS RDMA listeners.  We create two listeners for
+ * compatibility reason.  The one on RDS_PORT is used for IPv4
+ * requests only.  The one on RDS_CM_PORT is used for IPv6 requests
+ * only.  So only IPv6 enabled RDS module will communicate using this
+ * port.
+ */
+static int rds_rdma_listen_init(void)
+{
+       int ret;
+#if IS_ENABLED(CONFIG_IPV6)
+       struct sockaddr_in6 sin6;
+#endif
+       struct sockaddr_in sin;
+
+       sin.sin_family = PF_INET;
+       sin.sin_addr.s_addr = htonl(INADDR_ANY);
+       sin.sin_port = htons(RDS_PORT);
+       ret = rds_rdma_listen_init_common(rds_rdma_cm_event_handler,
+                                         (struct sockaddr *)&sin,
+                                         &rds_rdma_listen_id);
+       if (ret != 0)
+               return ret;
+
+#if IS_ENABLED(CONFIG_IPV6)
+       sin6.sin6_family = PF_INET6;
+       sin6.sin6_addr = in6addr_any;
+       sin6.sin6_port = htons(RDS_CM_PORT);
+       sin6.sin6_scope_id = 0;
+       sin6.sin6_flowinfo = 0;
+       ret = rds_rdma_listen_init_common(rds6_rdma_cm_event_handler,
+                                         (struct sockaddr *)&sin6,
+                                         &rds6_rdma_listen_id);
+       /* Keep going even when IPv6 is not enabled in the system. */
+       if (ret != 0)
+               rdsdebug("Cannot set up IPv6 RDMA listener\n");
+#endif
+       return 0;
+}
+
 static void rds_rdma_listen_stop(void)
 {
        if (rds_rdma_listen_id) {
@@ -200,6 +255,13 @@ static void rds_rdma_listen_stop(void)
                rdma_destroy_id(rds_rdma_listen_id);
                rds_rdma_listen_id = NULL;
        }
+#if IS_ENABLED(CONFIG_IPV6)
+       if (rds6_rdma_listen_id) {
+               rdsdebug("cm %p\n", rds6_rdma_listen_id);
+               rdma_destroy_id(rds6_rdma_listen_id);
+               rds6_rdma_listen_id = NULL;
+       }
+#endif
 }
 
 static int rds_rdma_init(void)
@@ -229,4 +291,3 @@ module_exit(rds_rdma_exit);
 MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
 MODULE_DESCRIPTION("RDS: IB transport");
 MODULE_LICENSE("Dual BSD/GPL");
-
index d309c44301243cc64fa6ba6f0465afc821e8c08d..200d3134aaae18f5e3c06f66aaf64f5918699236 100644 (file)
@@ -6,11 +6,16 @@
 #include <rdma/rdma_cm.h>
 #include "rds.h"
 
+/* RDMA_CM also uses 16385 as the listener port. */
+#define RDS_CM_PORT    16385
+
 #define RDS_RDMA_RESOLVE_TIMEOUT_MS     5000
 
 int rds_rdma_conn_connect(struct rds_connection *conn);
 int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                              struct rdma_cm_event *event);
+int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
+                              struct rdma_cm_event *event);
 
 /* from ib.c */
 extern struct rds_transport rds_ib_transport;
index 60b3b787fbdb321ca3a280314c18443890ed78c3..c4dcf654d8fed2338031ffdc470167d4a2d80269 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/rds.h>
 #include <linux/rhashtable.h>
 #include <linux/refcount.h>
+#include <linux/in6.h>
 
 #include "info.h"
 
 #define RDS_PROTOCOL_MINOR(v)  ((v) & 255)
 #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
 
-/*
- * XXX randomly chosen, but at least seems to be unused:
- * #               18464-18768 Unassigned
- * We should do better.  We want a reserved port to discourage unpriv'ed
- * userspace from listening.
+/* The following ports, 16385, 18634, 18635, are registered with IANA as
+ * the ports to be used for RDS over TCP and UDP.  Currently, only RDS over
+ * TCP and RDS over IB/RDMA are implemented.  18634 is the historical value
+ * used for the RDMA_CM listener port.  RDS/TCP uses port 16385.  After
+ * IPv6 work, RDMA_CM also uses 16385 as the listener port.  18634 is kept
+ * to ensure compatibility with older RDS modules.  Those ports are defined
+ * in each transport's header file.
  */
 #define RDS_PORT       18634
 
@@ -61,7 +64,7 @@ void rdsdebug(char *fmt, ...)
 
 struct rds_cong_map {
        struct rb_node          m_rb_node;
-       __be32                  m_addr;
+       struct in6_addr         m_addr;
        wait_queue_head_t       m_waitq;
        struct list_head        m_conn_list;
        unsigned long           m_page_addrs[RDS_CONG_MAP_PAGES];
@@ -136,11 +139,14 @@ struct rds_conn_path {
 /* One rds_connection per RDS address pair */
 struct rds_connection {
        struct hlist_node       c_hash_node;
-       __be32                  c_laddr;
-       __be32                  c_faddr;
+       struct in6_addr         c_laddr;
+       struct in6_addr         c_faddr;
+       int                     c_dev_if; /* ifindex used for this conn */
+       int                     c_bound_if; /* ifindex of c_laddr */
        unsigned int            c_loopback:1,
+                               c_isv6:1,
                                c_ping_triggered:1,
-                               c_pad_to_32:30;
+                               c_pad_to_32:29;
        int                     c_npaths;
        struct rds_connection   *c_passive;
        struct rds_transport    *c_trans;
@@ -269,7 +275,7 @@ struct rds_incoming {
        struct rds_conn_path    *i_conn_path;
        struct rds_header       i_hdr;
        unsigned long           i_rx_jiffies;
-       __be32                  i_saddr;
+       struct in6_addr         i_saddr;
 
        rds_rdma_cookie_t       i_rdma_cookie;
        struct timeval          i_rx_tstamp;
@@ -386,7 +392,7 @@ struct rds_message {
        struct list_head        m_conn_item;
        struct rds_incoming     m_inc;
        u64                     m_ack_seq;
-       __be32                  m_daddr;
+       struct in6_addr         m_daddr;
        unsigned long           m_flags;
 
        /* Never access m_rs without holding m_rs_lock.
@@ -521,7 +527,8 @@ struct rds_transport {
                                t_mp_capable:1;
        unsigned int            t_type;
 
-       int (*laddr_check)(struct net *net, __be32 addr);
+       int (*laddr_check)(struct net *net, const struct in6_addr *addr,
+                          __u32 scope_id);
        int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
        void (*conn_free)(void *data);
        int (*conn_path_connect)(struct rds_conn_path *cp);
@@ -537,8 +544,8 @@ struct rds_transport {
        void (*inc_free)(struct rds_incoming *inc);
 
        int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
-                                struct rdma_cm_event *event);
-       int (*cm_initiate_connect)(struct rdma_cm_id *cm_id);
+                                struct rdma_cm_event *event, bool isv6);
+       int (*cm_initiate_connect)(struct rdma_cm_id *cm_id, bool isv6);
        void (*cm_connect_complete)(struct rds_connection *conn,
                                    struct rdma_cm_event *event);
 
@@ -554,6 +561,12 @@ struct rds_transport {
        bool (*t_unloading)(struct rds_connection *conn);
 };
 
+/* Bind hash table key length.  It is the sum of the size of a struct
+ * in6_addr, a scope_id  and a port.
+ */
+#define RDS_BOUND_KEY_LEN \
+       (sizeof(struct in6_addr) + sizeof(__u32) + sizeof(__be16))
+
 struct rds_sock {
        struct sock             rs_sk;
 
@@ -565,10 +578,14 @@ struct rds_sock {
         * support.
         */
        struct rhash_head       rs_bound_node;
-       u64                     rs_bound_key;
-       __be32                  rs_bound_addr;
-       __be32                  rs_conn_addr;
-       __be16                  rs_bound_port;
+       u8                      rs_bound_key[RDS_BOUND_KEY_LEN];
+       struct sockaddr_in6     rs_bound_sin6;
+#define rs_bound_addr          rs_bound_sin6.sin6_addr
+#define rs_bound_addr_v4       rs_bound_sin6.sin6_addr.s6_addr32[3]
+#define rs_bound_port          rs_bound_sin6.sin6_port
+#define rs_bound_scope_id      rs_bound_sin6.sin6_scope_id
+       struct in6_addr         rs_conn_addr;
+#define rs_conn_addr_v4                rs_conn_addr.s6_addr32[3]
        __be16                  rs_conn_port;
        struct rds_transport    *rs_transport;
 
@@ -704,7 +721,8 @@ extern wait_queue_head_t rds_poll_waitq;
 /* bind.c */
 int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
 void rds_remove_bound(struct rds_sock *rs);
-struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
+struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
+                               __u32 scope_id);
 int rds_bind_lock_init(void);
 void rds_bind_lock_destroy(void);
 
@@ -723,16 +741,20 @@ void rds_cong_remove_socket(struct rds_sock *);
 void rds_cong_exit(void);
 struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
 
-/* conn.c */
+/* connection.c */
 extern u32 rds_gen_num;
 int rds_conn_init(void);
 void rds_conn_exit(void);
 struct rds_connection *rds_conn_create(struct net *net,
-                                      __be32 laddr, __be32 faddr,
-                                      struct rds_transport *trans, gfp_t gfp);
+                                      const struct in6_addr *laddr,
+                                      const struct in6_addr *faddr,
+                                      struct rds_transport *trans, gfp_t gfp,
+                                      int dev_if);
 struct rds_connection *rds_conn_create_outgoing(struct net *net,
-                                               __be32 laddr, __be32 faddr,
-                              struct rds_transport *trans, gfp_t gfp);
+                                               const struct in6_addr *laddr,
+                                               const struct in6_addr *faddr,
+                                               struct rds_transport *trans,
+                                               gfp_t gfp, int dev_if);
 void rds_conn_shutdown(struct rds_conn_path *cpath);
 void rds_conn_destroy(struct rds_connection *conn);
 void rds_conn_drop(struct rds_connection *conn);
@@ -843,11 +865,12 @@ void rds_page_exit(void);
 
 /* recv.c */
 void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
-                 __be32 saddr);
+                 struct in6_addr *saddr);
 void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn,
-                      __be32 saddr);
+                      struct in6_addr *saddr);
 void rds_inc_put(struct rds_incoming *inc);
-void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
+void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
+                      struct in6_addr *daddr,
                       struct rds_incoming *inc, gfp_t gfp);
 int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                int msg_flags);
@@ -856,13 +879,17 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
 void rds_inc_info_copy(struct rds_incoming *inc,
                       struct rds_info_iterator *iter,
                       __be32 saddr, __be32 daddr, int flip);
+void rds6_inc_info_copy(struct rds_incoming *inc,
+                       struct rds_info_iterator *iter,
+                       struct in6_addr *saddr, struct in6_addr *daddr,
+                       int flip);
 
 /* send.c */
 int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
 void rds_send_path_reset(struct rds_conn_path *conn);
 int rds_send_xmit(struct rds_conn_path *cp);
 struct sockaddr_in;
-void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
+void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest);
 typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
 void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
                         is_acked_func is_acked);
@@ -949,11 +976,14 @@ void rds_send_worker(struct work_struct *);
 void rds_recv_worker(struct work_struct *);
 void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
 void rds_connect_complete(struct rds_connection *conn);
+int rds_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2);
 
 /* transport.c */
 void rds_trans_register(struct rds_transport *trans);
 void rds_trans_unregister(struct rds_transport *trans);
-struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
+struct rds_transport *rds_trans_get_preferred(struct net *net,
+                                             const struct in6_addr *addr,
+                                             __u32 scope_id);
 void rds_trans_put(struct rds_transport *trans);
 unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
                                       unsigned int avail);
index 192ac6f78ded7b0288ac01d641cd5f7772b03fd8..504cd6bcc54c5ed4d6e151d176761899d5a25d91 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
 #include "rds.h"
 
 void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
-                 __be32 saddr)
+                struct in6_addr *saddr)
 {
        int i;
 
        refcount_set(&inc->i_refcount, 1);
        INIT_LIST_HEAD(&inc->i_item);
        inc->i_conn = conn;
-       inc->i_saddr = saddr;
+       inc->i_saddr = *saddr;
        inc->i_rdma_cookie = 0;
        inc->i_rx_tstamp.tv_sec = 0;
        inc->i_rx_tstamp.tv_usec = 0;
@@ -59,13 +59,13 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
 EXPORT_SYMBOL_GPL(rds_inc_init);
 
 void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
-                      __be32 saddr)
+                      struct in6_addr  *saddr)
 {
        refcount_set(&inc->i_refcount, 1);
        INIT_LIST_HEAD(&inc->i_item);
        inc->i_conn = cp->cp_conn;
        inc->i_conn_path = cp;
-       inc->i_saddr = saddr;
+       inc->i_saddr = *saddr;
        inc->i_rdma_cookie = 0;
        inc->i_rx_tstamp.tv_sec = 0;
        inc->i_rx_tstamp.tv_usec = 0;
@@ -110,7 +110,7 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
 
        now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
 
-       rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d "
+       rdsdebug("rs %p (%pI6c:%u) recv bytes %d buf %d "
          "now_cong %d delta %d\n",
          rs, &rs->rs_bound_addr,
          ntohs(rs->rs_bound_port), rs->rs_rcv_bytes,
@@ -260,7 +260,7 @@ static void rds_start_mprds(struct rds_connection *conn)
        struct rds_conn_path *cp;
 
        if (conn->c_npaths > 1 &&
-           IS_CANONICAL(conn->c_laddr, conn->c_faddr)) {
+           rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0) {
                for (i = 0; i < conn->c_npaths; i++) {
                        cp = &conn->c_path[i];
                        rds_conn_path_connect_if_down(cp);
@@ -284,7 +284,8 @@ static void rds_start_mprds(struct rds_connection *conn)
  * conn.  This lets loopback, who only has one conn for both directions,
  * tell us which roles the addrs in the conn are playing for this message.
  */
-void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
+void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
+                      struct in6_addr *daddr,
                       struct rds_incoming *inc, gfp_t gfp)
 {
        struct rds_sock *rs = NULL;
@@ -339,7 +340,8 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
 
        if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
                if (inc->i_hdr.h_sport == 0) {
-                       rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr);
+                       rdsdebug("ignore ping with 0 sport from %pI6c\n",
+                                saddr);
                        goto out;
                }
                rds_stats_inc(s_recv_ping);
@@ -362,7 +364,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
                goto out;
        }
 
-       rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
+       rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_bound_if);
        if (!rs) {
                rds_stats_inc(s_recv_drop_no_sock);
                goto out;
@@ -625,6 +627,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
        struct rds_sock *rs = rds_sk_to_rs(sk);
        long timeo;
        int ret = 0, nonblock = msg_flags & MSG_DONTWAIT;
+       DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
        DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
        struct rds_incoming *inc = NULL;
 
@@ -673,7 +676,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                        break;
                }
 
-               rdsdebug("copying inc %p from %pI4:%u to user\n", inc,
+               rdsdebug("copying inc %p from %pI6c:%u to user\n", inc,
                         &inc->i_conn->c_faddr,
                         ntohs(inc->i_hdr.h_sport));
                ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter);
@@ -707,12 +710,26 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
                rds_stats_inc(s_recv_delivered);
 
-               if (sin) {
-                       sin->sin_family = AF_INET;
-                       sin->sin_port = inc->i_hdr.h_sport;
-                       sin->sin_addr.s_addr = inc->i_saddr;
-                       memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-                       msg->msg_namelen = sizeof(*sin);
+               if (msg->msg_name) {
+                       if (ipv6_addr_v4mapped(&inc->i_saddr)) {
+                               sin = (struct sockaddr_in *)msg->msg_name;
+
+                               sin->sin_family = AF_INET;
+                               sin->sin_port = inc->i_hdr.h_sport;
+                               sin->sin_addr.s_addr =
+                                   inc->i_saddr.s6_addr32[3];
+                               memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+                               msg->msg_namelen = sizeof(*sin);
+                       } else {
+                               sin6 = (struct sockaddr_in6 *)msg->msg_name;
+
+                               sin6->sin6_family = AF_INET6;
+                               sin6->sin6_port = inc->i_hdr.h_sport;
+                               sin6->sin6_addr = inc->i_saddr;
+                               sin6->sin6_flowinfo = 0;
+                               sin6->sin6_scope_id = rs->rs_bound_scope_id;
+                               msg->msg_namelen = sizeof(*sin6);
+                       }
                }
                break;
        }
@@ -775,3 +792,30 @@ void rds_inc_info_copy(struct rds_incoming *inc,
 
        rds_info_copy(iter, &minfo, sizeof(minfo));
 }
+
+#if IS_ENABLED(CONFIG_IPV6)
+void rds6_inc_info_copy(struct rds_incoming *inc,
+                       struct rds_info_iterator *iter,
+                       struct in6_addr *saddr, struct in6_addr *daddr,
+                       int flip)
+{
+       struct rds6_info_message minfo6;
+
+       minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence);
+       minfo6.len = be32_to_cpu(inc->i_hdr.h_len);
+
+       if (flip) {
+               minfo6.laddr = *daddr;
+               minfo6.faddr = *saddr;
+               minfo6.lport = inc->i_hdr.h_dport;
+               minfo6.fport = inc->i_hdr.h_sport;
+       } else {
+               minfo6.laddr = *saddr;
+               minfo6.faddr = *daddr;
+               minfo6.lport = inc->i_hdr.h_sport;
+               minfo6.fport = inc->i_hdr.h_dport;
+       }
+
+       rds_info_copy(iter, &minfo6, sizeof(minfo6));
+}
+#endif
index 59f17a2335f44c00b445d8571a19eb393b803c68..57b3d5a8b2db59a3bc326199b51a04fc428ddc6d 100644 (file)
@@ -709,7 +709,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
 }
 EXPORT_SYMBOL_GPL(rds_send_drop_acked);
 
-void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
+void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest)
 {
        struct rds_message *rm, *tmp;
        struct rds_connection *conn;
@@ -721,8 +721,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
        spin_lock_irqsave(&rs->rs_lock, flags);
 
        list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) {
-               if (dest && (dest->sin_addr.s_addr != rm->m_daddr ||
-                            dest->sin_port != rm->m_inc.i_hdr.h_dport))
+               if (dest &&
+                   (!ipv6_addr_equal(&dest->sin6_addr, &rm->m_daddr) ||
+                    dest->sin6_port != rm->m_inc.i_hdr.h_dport))
                        continue;
 
                list_move(&rm->m_sock_item, &list);
@@ -1059,8 +1060,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 {
        struct sock *sk = sock->sk;
        struct rds_sock *rs = rds_sk_to_rs(sk);
+       DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
        DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
-       __be32 daddr;
        __be16 dport;
        struct rds_message *rm = NULL;
        struct rds_connection *conn;
@@ -1069,10 +1070,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
        int nonblock = msg->msg_flags & MSG_DONTWAIT;
        long timeo = sock_sndtimeo(sk, nonblock);
        struct rds_conn_path *cpath;
+       struct in6_addr daddr;
+       __u32 scope_id = 0;
        size_t total_payload_len = payload_len, rdma_payload_len = 0;
        bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
                      sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
        int num_sgs = ceil(payload_len, PAGE_SIZE);
+       int namelen;
 
        /* Mirror Linux UDP mirror of BSD error message compatibility */
        /* XXX: Perhaps MSG_MORE someday */
@@ -1081,27 +1085,108 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
                goto out;
        }
 
-       if (msg->msg_namelen) {
-               /* XXX fail non-unicast destination IPs? */
-               if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != AF_INET) {
+       namelen = msg->msg_namelen;
+       if (namelen != 0) {
+               if (namelen < sizeof(*usin)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               switch (usin->sin_family) {
+               case AF_INET:
+                       if (usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
+                           usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
+                           IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       ipv6_addr_set_v4mapped(usin->sin_addr.s_addr, &daddr);
+                       dport = usin->sin_port;
+                       break;
+
+#if IS_ENABLED(CONFIG_IPV6)
+               case AF_INET6: {
+                       int addr_type;
+
+                       if (namelen < sizeof(*sin6)) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       addr_type = ipv6_addr_type(&sin6->sin6_addr);
+                       if (!(addr_type & IPV6_ADDR_UNICAST)) {
+                               __be32 addr4;
+
+                               if (!(addr_type & IPV6_ADDR_MAPPED)) {
+                                       ret = -EINVAL;
+                                       goto out;
+                               }
+
+                               /* It is a mapped address.  Need to do some
+                                * sanity checks.
+                                */
+                               addr4 = sin6->sin6_addr.s6_addr32[3];
+                               if (addr4 == htonl(INADDR_ANY) ||
+                                   addr4 == htonl(INADDR_BROADCAST) ||
+                                   IN_MULTICAST(ntohl(addr4))) {
+                                       ret = -EINVAL;
+                                       goto out;
+                               }
+                       }
+                       if (addr_type & IPV6_ADDR_LINKLOCAL) {
+                               if (sin6->sin6_scope_id == 0) {
+                                       ret = -EINVAL;
+                                       goto out;
+                               }
+                               scope_id = sin6->sin6_scope_id;
+                       }
+
+                       daddr = sin6->sin6_addr;
+                       dport = sin6->sin6_port;
+                       break;
+               }
+#endif
+
+               default:
                        ret = -EINVAL;
                        goto out;
                }
-               daddr = usin->sin_addr.s_addr;
-               dport = usin->sin_port;
        } else {
                /* We only care about consistency with ->connect() */
                lock_sock(sk);
                daddr = rs->rs_conn_addr;
                dport = rs->rs_conn_port;
+               scope_id = rs->rs_bound_scope_id;
                release_sock(sk);
        }
 
        lock_sock(sk);
-       if (daddr == 0 || rs->rs_bound_addr == 0) {
+       if (ipv6_addr_any(&rs->rs_bound_addr) || ipv6_addr_any(&daddr)) {
                release_sock(sk);
-               ret = -ENOTCONN; /* XXX not a great errno */
+               ret = -ENOTCONN;
                goto out;
+       } else if (namelen != 0) {
+               /* Cannot send to an IPv4 address using an IPv6 source
+                * address and cannot send to an IPv6 address using an
+                * IPv4 source address.
+                */
+               if (ipv6_addr_v4mapped(&daddr) ^
+                   ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
+                       release_sock(sk);
+                       ret = -EOPNOTSUPP;
+                       goto out;
+               }
+               /* If the socket is already bound to a link local address,
+                * it can only send to peers on the same link.  But allow
+                * communicating beween link local and non-link local address.
+                */
+               if (scope_id != rs->rs_bound_scope_id) {
+                       if (!scope_id) {
+                               scope_id = rs->rs_bound_scope_id;
+                       } else if (rs->rs_bound_scope_id) {
+                               release_sock(sk);
+                               ret = -EINVAL;
+                               goto out;
+                       }
+               }
        }
        release_sock(sk);
 
@@ -1155,13 +1240,14 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 
        /* rds_conn_create has a spinlock that runs with IRQ off.
         * Caching the conn in the socket helps a lot. */
-       if (rs->rs_conn && rs->rs_conn->c_faddr == daddr)
+       if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
                conn = rs->rs_conn;
        else {
                conn = rds_conn_create_outgoing(sock_net(sock->sk),
-                                               rs->rs_bound_addr, daddr,
-                                       rs->rs_transport,
-                                       sock->sk->sk_allocation);
+                                               &rs->rs_bound_addr, &daddr,
+                                               rs->rs_transport,
+                                               sock->sk->sk_allocation,
+                                               scope_id);
                if (IS_ERR(conn)) {
                        ret = PTR_ERR(conn);
                        goto out;
index 351a28474667a351072de769e6ad9ebe3ac50e44..2c7b7c352d3e8cf77c7aed4d29b61d481249827f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -37,6 +37,8 @@
 #include <net/tcp.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/tcp.h>
+#include <net/addrconf.h>
 
 #include "rds.h"
 #include "tcp.h"
 /* only for info exporting */
 static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
 static LIST_HEAD(rds_tcp_tc_list);
+
+/* rds_tcp_tc_count counts only IPv4 connections.
+ * rds6_tcp_tc_count counts both IPv4 and IPv6 connections.
+ */
 static unsigned int rds_tcp_tc_count;
+#if IS_ENABLED(CONFIG_IPV6)
+static unsigned int rds6_tcp_tc_count;
+#endif
 
 /* Track rds_tcp_connection structs so they can be cleaned up */
 static DEFINE_SPINLOCK(rds_tcp_conn_lock);
@@ -111,7 +120,11 @@ void rds_tcp_restore_callbacks(struct socket *sock,
        /* done under the callback_lock to serialize with write_space */
        spin_lock(&rds_tcp_tc_list_lock);
        list_del_init(&tc->t_list_item);
-       rds_tcp_tc_count--;
+#if IS_ENABLED(CONFIG_IPV6)
+       rds6_tcp_tc_count--;
+#endif
+       if (!tc->t_cpath->cp_conn->c_isv6)
+               rds_tcp_tc_count--;
        spin_unlock(&rds_tcp_tc_list_lock);
 
        tc->t_sock = NULL;
@@ -198,7 +211,11 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
        /* done under the callback_lock to serialize with write_space */
        spin_lock(&rds_tcp_tc_list_lock);
        list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
-       rds_tcp_tc_count++;
+#if IS_ENABLED(CONFIG_IPV6)
+       rds6_tcp_tc_count++;
+#endif
+       if (!tc->t_cpath->cp_conn->c_isv6)
+               rds_tcp_tc_count++;
        spin_unlock(&rds_tcp_tc_list_lock);
 
        /* accepted sockets need our listen data ready undone */
@@ -219,6 +236,9 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
        write_unlock_bh(&sock->sk->sk_callback_lock);
 }
 
+/* Handle RDS_INFO_TCP_SOCKETS socket option.  It only returns IPv4
+ * connections for backward compatibility.
+ */
 static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
                            struct rds_info_iterator *iter,
                            struct rds_info_lengths *lens)
@@ -226,8 +246,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
        struct rds_info_tcp_socket tsinfo;
        struct rds_tcp_connection *tc;
        unsigned long flags;
-       struct sockaddr_in sin;
-       struct socket *sock;
 
        spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
 
@@ -235,16 +253,15 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
                goto out;
 
        list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+               struct inet_sock *inet = inet_sk(tc->t_sock->sk);
 
-               sock = tc->t_sock;
-               if (sock) {
-                       sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
-                       tsinfo.local_addr = sin.sin_addr.s_addr;
-                       tsinfo.local_port = sin.sin_port;
-                       sock->ops->getname(sock, (struct sockaddr *)&sin, 1);
-                       tsinfo.peer_addr = sin.sin_addr.s_addr;
-                       tsinfo.peer_port = sin.sin_port;
-               }
+               if (tc->t_cpath->cp_conn->c_isv6)
+                       continue;
+
+               tsinfo.local_addr = inet->inet_saddr;
+               tsinfo.local_port = inet->inet_sport;
+               tsinfo.peer_addr = inet->inet_daddr;
+               tsinfo.peer_port = inet->inet_dport;
 
                tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
                tsinfo.data_rem = tc->t_tinc_data_rem;
@@ -262,10 +279,82 @@ out:
        spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
 }
 
-static int rds_tcp_laddr_check(struct net *net, __be32 addr)
+#if IS_ENABLED(CONFIG_IPV6)
+/* Handle RDS6_INFO_TCP_SOCKETS socket option. It returns both IPv4 and
+ * IPv6 connections. IPv4 connection address is returned in an IPv4 mapped
+ * address.
+ */
+static void rds6_tcp_tc_info(struct socket *sock, unsigned int len,
+                            struct rds_info_iterator *iter,
+                            struct rds_info_lengths *lens)
 {
-       if (inet_addr_type(net, addr) == RTN_LOCAL)
+       struct rds6_info_tcp_socket tsinfo6;
+       struct rds_tcp_connection *tc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
+
+       if (len / sizeof(tsinfo6) < rds6_tcp_tc_count)
+               goto out;
+
+       list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+               struct sock *sk = tc->t_sock->sk;
+               struct inet_sock *inet = inet_sk(sk);
+
+               tsinfo6.local_addr = sk->sk_v6_rcv_saddr;
+               tsinfo6.local_port = inet->inet_sport;
+               tsinfo6.peer_addr = sk->sk_v6_daddr;
+               tsinfo6.peer_port = inet->inet_dport;
+
+               tsinfo6.hdr_rem = tc->t_tinc_hdr_rem;
+               tsinfo6.data_rem = tc->t_tinc_data_rem;
+               tsinfo6.last_sent_nxt = tc->t_last_sent_nxt;
+               tsinfo6.last_expected_una = tc->t_last_expected_una;
+               tsinfo6.last_seen_una = tc->t_last_seen_una;
+
+               rds_info_copy(iter, &tsinfo6, sizeof(tsinfo6));
+       }
+
+out:
+       lens->nr = rds6_tcp_tc_count;
+       lens->each = sizeof(tsinfo6);
+
+       spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
+}
+#endif
+
+static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
+                              __u32 scope_id)
+{
+       struct net_device *dev = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+       int ret;
+#endif
+
+       if (ipv6_addr_v4mapped(addr)) {
+               if (inet_addr_type(net, addr->s6_addr32[3]) == RTN_LOCAL)
+                       return 0;
+               return -EADDRNOTAVAIL;
+       }
+
+       /* If the scope_id is specified, check only those addresses
+        * hosted on the specified interface.
+        */
+       if (scope_id != 0) {
+               rcu_read_lock();
+               dev = dev_get_by_index_rcu(net, scope_id);
+               /* scope_id is not valid... */
+               if (!dev) {
+                       rcu_read_unlock();
+                       return -EADDRNOTAVAIL;
+               }
+               rcu_read_unlock();
+       }
+#if IS_ENABLED(CONFIG_IPV6)
+       ret = ipv6_chk_addr(net, addr, dev, 0);
+       if (ret)
                return 0;
+#endif
        return -EADDRNOTAVAIL;
 }
 
@@ -468,13 +557,27 @@ static __net_init int rds_tcp_init_net(struct net *net)
                err = -ENOMEM;
                goto fail;
        }
-       rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
+
+#if IS_ENABLED(CONFIG_IPV6)
+       rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, true);
+#else
+       rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false);
+#endif
        if (!rtn->rds_tcp_listen_sock) {
-               pr_warn("could not set up listen sock\n");
-               unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
-               rtn->rds_tcp_sysctl = NULL;
-               err = -EAFNOSUPPORT;
-               goto fail;
+               pr_warn("could not set up IPv6 listen sock\n");
+
+#if IS_ENABLED(CONFIG_IPV6)
+               /* Try IPv4 as some systems disable IPv6 */
+               rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false);
+               if (!rtn->rds_tcp_listen_sock) {
+#endif
+                       unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
+                       rtn->rds_tcp_sysctl = NULL;
+                       err = -EAFNOSUPPORT;
+                       goto fail;
+#if IS_ENABLED(CONFIG_IPV6)
+               }
+#endif
        }
        INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
        return 0;
@@ -588,6 +691,9 @@ static void rds_tcp_exit(void)
        rds_tcp_set_unloading();
        synchronize_rcu();
        rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
+#if IS_ENABLED(CONFIG_IPV6)
+       rds_info_deregister_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info);
+#endif
        unregister_pernet_device(&rds_tcp_net_ops);
        rds_tcp_destroy_conns();
        rds_trans_unregister(&rds_tcp_transport);
@@ -619,6 +725,9 @@ static int rds_tcp_init(void)
        rds_trans_register(&rds_tcp_transport);
 
        rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
+#if IS_ENABLED(CONFIG_IPV6)
+       rds_info_register_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info);
+#endif
 
        goto out;
 out_recv:
@@ -633,4 +742,3 @@ module_init(rds_tcp_init);
 MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
 MODULE_DESCRIPTION("RDS: TCP transport");
 MODULE_LICENSE("Dual BSD/GPL");
-
index c6fa080e9b6d08c76d500c0e16fb281f09acd2b3..3c69361d21c730c4680033382049f4a0b457a727 100644 (file)
@@ -67,7 +67,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn);
 void rds_tcp_state_change(struct sock *sk);
 
 /* tcp_listen.c */
-struct socket *rds_tcp_listen_init(struct net *);
+struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
 void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
 void rds_tcp_listen_data_ready(struct sock *sk);
 int rds_tcp_accept_one(struct socket *sock);
index d999e707564579f0b81a8667946b348566b2695b..008f50fb25dd24f572aba43a8d9c58e9c7bd8cea 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -66,7 +66,8 @@ void rds_tcp_state_change(struct sock *sk)
                 * RDS connection as RDS_CONN_UP until the reconnect,
                 * to avoid RDS datagram loss.
                 */
-               if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
+               if (rds_addr_cmp(&cp->cp_conn->c_laddr,
+                                &cp->cp_conn->c_faddr) >= 0 &&
                    rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
                                             RDS_CONN_ERROR)) {
                        rds_conn_path_drop(cp, false);
@@ -88,7 +89,11 @@ out:
 int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 {
        struct socket *sock = NULL;
-       struct sockaddr_in src, dest;
+       struct sockaddr_in6 sin6;
+       struct sockaddr_in sin;
+       struct sockaddr *addr;
+       int addrlen;
+       bool isv6;
        int ret;
        struct rds_connection *conn = cp->cp_conn;
        struct rds_tcp_connection *tc = cp->cp_transport_data;
@@ -105,37 +110,68 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
                mutex_unlock(&tc->t_conn_path_lock);
                return 0;
        }
-       ret = sock_create_kern(rds_conn_net(conn), PF_INET,
-                              SOCK_STREAM, IPPROTO_TCP, &sock);
+       if (ipv6_addr_v4mapped(&conn->c_laddr)) {
+               ret = sock_create_kern(rds_conn_net(conn), PF_INET,
+                                      SOCK_STREAM, IPPROTO_TCP, &sock);
+               isv6 = false;
+       } else {
+               ret = sock_create_kern(rds_conn_net(conn), PF_INET6,
+                                      SOCK_STREAM, IPPROTO_TCP, &sock);
+               isv6 = true;
+       }
+
        if (ret < 0)
                goto out;
 
        rds_tcp_tune(sock);
 
-       src.sin_family = AF_INET;
-       src.sin_addr.s_addr = (__force u32)conn->c_laddr;
-       src.sin_port = (__force u16)htons(0);
+       if (isv6) {
+               sin6.sin6_family = AF_INET6;
+               sin6.sin6_addr = conn->c_laddr;
+               sin6.sin6_port = 0;
+               sin6.sin6_flowinfo = 0;
+               sin6.sin6_scope_id = conn->c_dev_if;
+               addr = (struct sockaddr *)&sin6;
+               addrlen = sizeof(sin6);
+       } else {
+               sin.sin_family = AF_INET;
+               sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
+               sin.sin_port = 0;
+               addr = (struct sockaddr *)&sin;
+               addrlen = sizeof(sin);
+       }
 
-       ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src));
+       ret = sock->ops->bind(sock, addr, addrlen);
        if (ret) {
-               rdsdebug("bind failed with %d at address %pI4\n",
+               rdsdebug("bind failed with %d at address %pI6c\n",
                         ret, &conn->c_laddr);
                goto out;
        }
 
-       dest.sin_family = AF_INET;
-       dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
-       dest.sin_port = (__force u16)htons(RDS_TCP_PORT);
+       if (isv6) {
+               sin6.sin6_family = AF_INET6;
+               sin6.sin6_addr = conn->c_faddr;
+               sin6.sin6_port = htons(RDS_TCP_PORT);
+               sin6.sin6_flowinfo = 0;
+               sin6.sin6_scope_id = conn->c_dev_if;
+               addr = (struct sockaddr *)&sin6;
+               addrlen = sizeof(sin6);
+       } else {
+               sin.sin_family = AF_INET;
+               sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
+               sin.sin_port = htons(RDS_TCP_PORT);
+               addr = (struct sockaddr *)&sin;
+               addrlen = sizeof(sin);
+       }
 
        /*
         * once we call connect() we can start getting callbacks and they
         * own the socket
         */
        rds_tcp_set_callbacks(sock, cp);
-       ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
-                                O_NONBLOCK);
+       ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK);
 
-       rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
+       rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret);
        if (ret == -EINPROGRESS)
                ret = 0;
        if (ret == 0) {
index 22571189f21e7e4a805af1b7edaed1c9f3c918ef..c12203f646da92e439dc17a35c6c31be8a56c4c4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2018 Oracle All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -83,13 +83,12 @@ static
 struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
 {
        int i;
-       bool peer_is_smaller = IS_CANONICAL(conn->c_faddr, conn->c_laddr);
        int npaths = max_t(int, 1, conn->c_npaths);
 
        /* for mprds, all paths MUST be initiated by the peer
         * with the smaller address.
         */
-       if (!peer_is_smaller) {
+       if (rds_addr_cmp(&conn->c_faddr, &conn->c_laddr) >= 0) {
                /* Make sure we initiate at least one path if this
                 * has not already been done; rds_start_mprds() will
                 * take care of additional paths, if necessary.
@@ -132,6 +131,11 @@ int rds_tcp_accept_one(struct socket *sock)
        struct rds_tcp_connection *rs_tcp = NULL;
        int conn_state;
        struct rds_conn_path *cp;
+       struct in6_addr *my_addr, *peer_addr;
+#if !IS_ENABLED(CONFIG_IPV6)
+       struct in6_addr saddr, daddr;
+#endif
+       int dev_if = 0;
 
        if (!sock) /* module unload or netns delete in progress */
                return -ENETUNREACH;
@@ -164,13 +168,40 @@ int rds_tcp_accept_one(struct socket *sock)
 
        inet = inet_sk(new_sock->sk);
 
-       rdsdebug("accepted tcp %pI4:%u -> %pI4:%u\n",
-                &inet->inet_saddr, ntohs(inet->inet_sport),
-                &inet->inet_daddr, ntohs(inet->inet_dport));
+#if IS_ENABLED(CONFIG_IPV6)
+       my_addr = &new_sock->sk->sk_v6_rcv_saddr;
+       peer_addr = &new_sock->sk->sk_v6_daddr;
+#else
+       ipv6_addr_set_v4mapped(inet->inet_saddr, &saddr);
+       ipv6_addr_set_v4mapped(inet->inet_daddr, &daddr);
+       my_addr = &saddr;
+       peer_addr = &daddr;
+#endif
+       rdsdebug("accepted family %d tcp %pI6c:%u -> %pI6c:%u\n",
+                sock->sk->sk_family,
+                my_addr, ntohs(inet->inet_sport),
+                peer_addr, ntohs(inet->inet_dport));
+
+#if IS_ENABLED(CONFIG_IPV6)
+       /* sk_bound_dev_if is not set if the peer address is not link local
+        * address.  In this case, it happens that mcast_oif is set.  So
+        * just use it.
+        */
+       if ((ipv6_addr_type(my_addr) & IPV6_ADDR_LINKLOCAL) &&
+           !(ipv6_addr_type(peer_addr) & IPV6_ADDR_LINKLOCAL)) {
+               struct ipv6_pinfo *inet6;
+
+               inet6 = inet6_sk(new_sock->sk);
+               dev_if = inet6->mcast_oif;
+       } else {
+               dev_if = new_sock->sk->sk_bound_dev_if;
+       }
+#endif
 
        conn = rds_conn_create(sock_net(sock->sk),
-                              inet->inet_saddr, inet->inet_daddr,
-                              &rds_tcp_transport, GFP_KERNEL);
+                              my_addr, peer_addr,
+                              &rds_tcp_transport, GFP_KERNEL, dev_if);
+
        if (IS_ERR(conn)) {
                ret = PTR_ERR(conn);
                goto out;
@@ -254,15 +285,22 @@ out:
                ready(sk);
 }
 
-struct socket *rds_tcp_listen_init(struct net *net)
+struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
 {
-       struct sockaddr_in sin;
        struct socket *sock = NULL;
+       struct sockaddr_storage ss;
+       struct sockaddr_in6 *sin6;
+       struct sockaddr_in *sin;
+       int addr_len;
        int ret;
 
-       ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
-       if (ret < 0)
+       ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM,
+                              IPPROTO_TCP, &sock);
+       if (ret < 0) {
+               rdsdebug("could not create %s listener socket: %d\n",
+                        isv6 ? "IPv6" : "IPv4", ret);
                goto out;
+       }
 
        sock->sk->sk_reuse = SK_CAN_REUSE;
        rds_tcp_nonagle(sock);
@@ -272,13 +310,28 @@ struct socket *rds_tcp_listen_init(struct net *net)
        sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
        write_unlock_bh(&sock->sk->sk_callback_lock);
 
-       sin.sin_family = PF_INET;
-       sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
-       sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
+       if (isv6) {
+               sin6 = (struct sockaddr_in6 *)&ss;
+               sin6->sin6_family = PF_INET6;
+               sin6->sin6_addr = in6addr_any;
+               sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
+               sin6->sin6_scope_id = 0;
+               sin6->sin6_flowinfo = 0;
+               addr_len = sizeof(*sin6);
+       } else {
+               sin = (struct sockaddr_in *)&ss;
+               sin->sin_family = PF_INET;
+               sin->sin_addr.s_addr = INADDR_ANY;
+               sin->sin_port = (__force u16)htons(RDS_TCP_PORT);
+               addr_len = sizeof(*sin);
+       }
 
-       ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
-       if (ret < 0)
+       ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
+       if (ret < 0) {
+               rdsdebug("could not bind %s listener socket: %d\n",
+                        isv6 ? "IPv6" : "IPv4", ret);
                goto out;
+       }
 
        ret = sock->ops->listen(sock, 64);
        if (ret < 0)
index b9fbd2ee74efe1c4f75cb499f00ce92f8be5a331..42c5ff1eda95f914c7a138b5aed8e11b884776ed 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -179,7 +179,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
                        tc->t_tinc = tinc;
                        rdsdebug("alloced tinc %p\n", tinc);
                        rds_inc_path_init(&tinc->ti_inc, cp,
-                                         cp->cp_conn->c_faddr);
+                                         &cp->cp_conn->c_faddr);
                        tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
                                        local_clock();
 
@@ -239,8 +239,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
                        if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
                                rds_tcp_cong_recv(conn, tinc);
                        else
-                               rds_recv_incoming(conn, conn->c_faddr,
-                                                 conn->c_laddr, &tinc->ti_inc,
+                               rds_recv_incoming(conn, &conn->c_faddr,
+                                                 &conn->c_laddr,
+                                                 &tinc->ti_inc,
                                                  arg->gfp);
 
                        tc->t_tinc_hdr_rem = sizeof(struct rds_header);
index 7df869d37afd4c27e519b227b57bb306cf30ef35..78a2554a4497928e08c28874711ef15628f5d1fa 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -153,7 +153,7 @@ out:
                         * an incoming RST.
                         */
                        if (rds_conn_path_up(cp)) {
-                               pr_warn("RDS/tcp: send to %pI4 on cp [%d]"
+                               pr_warn("RDS/tcp: send to %pI6c on cp [%d]"
                                        "returned %d, "
                                        "disconnecting and reconnecting\n",
                                        &conn->c_faddr, cp->cp_index, ret);
index c52861d77a596ca49ad40c3c7c2efe902bc135f4..e64f9e4c3cdaf6c0f7c566421241b659c0603a69 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -82,8 +82,8 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
                return;
        }
 
-       rdsdebug("conn %p for %pI4 to %pI4 complete\n",
-         cp->cp_conn, &cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr);
+       rdsdebug("conn %p for %pI6c to %pI6c complete\n",
+                cp->cp_conn, &cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr);
 
        cp->cp_reconnect_jiffies = 0;
        set_bit(0, &cp->cp_conn->c_map_queued);
@@ -125,13 +125,13 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
        unsigned long rand;
        struct rds_connection *conn = cp->cp_conn;
 
-       rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n",
-         conn, &conn->c_laddr, &conn->c_faddr,
-         cp->cp_reconnect_jiffies);
+       rdsdebug("conn %p for %pI6c to %pI6c reconnect jiffies %lu\n",
+                conn, &conn->c_laddr, &conn->c_faddr,
+                cp->cp_reconnect_jiffies);
 
        /* let peer with smaller addr initiate reconnect, to avoid duels */
        if (conn->c_trans->t_type == RDS_TRANS_TCP &&
-           !IS_CANONICAL(conn->c_laddr, conn->c_faddr))
+           rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) >= 0)
                return;
 
        set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
@@ -145,7 +145,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
        }
 
        get_random_bytes(&rand, sizeof(rand));
-       rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
+       rdsdebug("%lu delay %lu ceil conn %p for %pI6c -> %pI6c\n",
                 rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
                 conn, &conn->c_laddr, &conn->c_faddr);
        rcu_read_lock();
@@ -167,14 +167,14 @@ void rds_connect_worker(struct work_struct *work)
        int ret;
 
        if (cp->cp_index > 0 &&
-           !IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr))
+           rds_addr_cmp(&cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr) >= 0)
                return;
        clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
        ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
        if (ret) {
                ret = conn->c_trans->conn_path_connect(cp);
-               rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n",
-                       conn, &conn->c_laddr, &conn->c_faddr, ret);
+               rdsdebug("conn %p for %pI6c to %pI6c dispatched, ret %d\n",
+                        conn, &conn->c_laddr, &conn->c_faddr, ret);
 
                if (ret) {
                        if (rds_conn_path_transition(cp,
@@ -259,3 +259,50 @@ int rds_threads_init(void)
 
        return 0;
 }
+
+/* Compare two IPv6 addresses.  Return 0 if the two addresses are equal.
+ * Return 1 if the first is greater.  Return -1 if the second is greater.
+ */
+int rds_addr_cmp(const struct in6_addr *addr1,
+                const struct in6_addr *addr2)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+       const __be64 *a1, *a2;
+       u64 x, y;
+
+       a1 = (__be64 *)addr1;
+       a2 = (__be64 *)addr2;
+
+       if (*a1 != *a2) {
+               if (be64_to_cpu(*a1) < be64_to_cpu(*a2))
+                       return -1;
+               else
+                       return 1;
+       } else {
+               x = be64_to_cpu(*++a1);
+               y = be64_to_cpu(*++a2);
+               if (x < y)
+                       return -1;
+               else if (x > y)
+                       return 1;
+               else
+                       return 0;
+       }
+#else
+       u32 a, b;
+       int i;
+
+       for (i = 0; i < 4; i++) {
+               if (addr1->s6_addr32[i] != addr2->s6_addr32[i]) {
+                       a = ntohl(addr1->s6_addr32[i]);
+                       b = ntohl(addr2->s6_addr32[i]);
+                       if (a < b)
+                               return -1;
+                       else if (a > b)
+                               return 1;
+               }
+       }
+       return 0;
+#endif
+}
+EXPORT_SYMBOL_GPL(rds_addr_cmp);
index 0b188dd0a344cb0fd876fa2b32bb5e7475b255ff..46f709a4b577813c9c5b784b3a766445d64ef539 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -33,6 +33,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/in.h>
+#include <linux/ipv6.h>
 
 #include "rds.h"
 #include "loop.h"
@@ -75,20 +76,26 @@ void rds_trans_put(struct rds_transport *trans)
                module_put(trans->t_owner);
 }
 
-struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr)
+struct rds_transport *rds_trans_get_preferred(struct net *net,
+                                             const struct in6_addr *addr,
+                                             __u32 scope_id)
 {
        struct rds_transport *ret = NULL;
        struct rds_transport *trans;
        unsigned int i;
 
-       if (IN_LOOPBACK(ntohl(addr)))
+       if (ipv6_addr_v4mapped(addr)) {
+               if (*(u_int8_t *)&addr->s6_addr32[3] == IN_LOOPBACKNET)
+                       return &rds_loop_transport;
+       } else if (ipv6_addr_loopback(addr)) {
                return &rds_loop_transport;
+       }
 
        down_read(&rds_trans_sem);
        for (i = 0; i < RDS_TRANS_COUNT; i++) {
                trans = transports[i];
 
-               if (trans && (trans->laddr_check(net, addr) == 0) &&
+               if (trans && (trans->laddr_check(net, addr, scope_id) == 0) &&
                    (!trans->t_owner || try_module_get(trans->t_owner))) {
                        ret = trans;
                        break;
@@ -152,4 +159,3 @@ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
 
        return total;
 }
-
index 2b463047dd7ba93267feb584e1ffda280449a0b3..ac44d8afffb118101459426f4f8c2edbba4d8401 100644 (file)
@@ -741,7 +741,7 @@ static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
        struct rxrpc_sock *rx = rxrpc_sk(sk);
        __poll_t mask;
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
        mask = 0;
 
        /* the socket is readable if there are any messages waiting on the Rx
index 5fb7d3254d9e290106dda6566ed393eb6a2c3696..9d9278a13d9164eb7a975a56c173152d043d1f56 100644 (file)
@@ -420,6 +420,7 @@ struct rxrpc_connection {
        struct rxrpc_channel {
                unsigned long           final_ack_at;   /* Time at which to issue final ACK */
                struct rxrpc_call __rcu *call;          /* Active call */
+               unsigned int            call_debug_id;  /* call->debug_id */
                u32                     call_id;        /* ID of current call */
                u32                     call_counter;   /* Call ID counter */
                u32                     last_call;      /* ID of last call */
@@ -478,6 +479,7 @@ enum rxrpc_call_flag {
        RXRPC_CALL_RETRANS_TIMEOUT,     /* Retransmission due to timeout occurred */
        RXRPC_CALL_BEGAN_RX_TIMER,      /* We began the expect_rx_by timer */
        RXRPC_CALL_RX_HEARD,            /* The peer responded at least once to this call */
+       RXRPC_CALL_RX_UNDERRUN,         /* Got data underrun */
 };
 
 /*
@@ -588,7 +590,7 @@ struct rxrpc_call {
         */
 #define RXRPC_RXTX_BUFF_SIZE   64
 #define RXRPC_RXTX_BUFF_MASK   (RXRPC_RXTX_BUFF_SIZE - 1)
-#define RXRPC_INIT_RX_WINDOW_SIZE 32
+#define RXRPC_INIT_RX_WINDOW_SIZE 63
        struct sk_buff          **rxtx_buffer;
        u8                      *rxtx_annotations;
 #define RXRPC_TX_ANNO_ACK      0
index 20210418904bf61975e5f00ef196c5dce4770cca..8e7434e92097e8f0a2676bcf87df090daf43ee2e 100644 (file)
@@ -162,7 +162,6 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
  */
 static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
 {
-       struct rxrpc_skb_priv *sp;
        struct sk_buff *skb;
        unsigned long resend_at;
        rxrpc_seq_t cursor, seq, top;
@@ -207,7 +206,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
 
                skb = call->rxtx_buffer[ix];
                rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
-               sp = rxrpc_skb(skb);
 
                if (anno_type == RXRPC_TX_ANNO_UNACK) {
                        if (ktime_after(skb->tstamp, max_age)) {
index 5736f643c51646b603cbad57330930bba6712ff1..f8f37188a9322829b8f4277c09b7329d2f4c1da0 100644 (file)
@@ -590,6 +590,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
         */
        smp_wmb();
        chan->call_id   = call_id;
+       chan->call_debug_id = call->debug_id;
        rcu_assign_pointer(chan->call, call);
        wake_up(&call->waitq);
 }
@@ -1051,7 +1052,6 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work)
                container_of(work, struct rxrpc_net, client_conn_reaper);
        unsigned long expiry, conn_expires_at, now;
        unsigned int nr_conns;
-       bool did_discard = false;
 
        _enter("");
 
@@ -1113,7 +1113,6 @@ next:
         * If someone re-sets the flag and re-gets the ref, that's fine.
         */
        rxrpc_put_connection(conn);
-       did_discard = true;
        nr_conns--;
        goto next;
 
index 8229a52c2acd79f69883e27e966fa4df43ca93c3..84d40ba9856f462504fe6675391dcea7fd2bd642 100644 (file)
@@ -129,8 +129,10 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
                _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort);
                break;
        case RXRPC_PACKET_TYPE_ACK:
-               trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0,
-                                  RXRPC_ACK_DUPLICATE, 0);
+               trace_rxrpc_tx_ack(chan->call_debug_id, serial,
+                                  ntohl(pkt.ack.firstPacket),
+                                  ntohl(pkt.ack.serial),
+                                  pkt.ack.reason, 0);
                _proto("Tx ACK %%%u [re]", serial);
                break;
        }
@@ -138,8 +140,11 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
        ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
        conn->params.peer->last_tx_at = ktime_get_real();
        if (ret < 0)
-               trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
-                                   rxrpc_tx_fail_call_final_resend);
+               trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret,
+                                   rxrpc_tx_point_call_final_resend);
+       else
+               trace_rxrpc_tx_packet(chan->call_debug_id, &pkt.whdr,
+                                     rxrpc_tx_point_call_final_resend);
 
        _leave("");
 }
@@ -240,11 +245,13 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
        ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
        if (ret < 0) {
                trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
-                                   rxrpc_tx_fail_conn_abort);
+                                   rxrpc_tx_point_conn_abort);
                _debug("sendmsg failed: %d", ret);
                return -EAGAIN;
        }
 
+       trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
+
        conn->params.peer->last_tx_at = ktime_get_real();
 
        _leave(" = 0");
index 608d078a4981fd78d42b1e3c87b983a7ec0be0db..cfdc199c63510255c1d8cd60baed3c5f66b93d28 100644 (file)
@@ -496,7 +496,7 @@ next_subpacket:
                        return rxrpc_proto_abort("LSA", call, seq);
        }
 
-       trace_rxrpc_rx_data(call, seq, serial, flags, annotation);
+       trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation);
        if (before_eq(seq, hard_ack)) {
                ack = RXRPC_ACK_DUPLICATE;
                ack_serial = serial;
@@ -592,9 +592,15 @@ ack:
                rxrpc_propose_ACK(call, ack, skew, ack_serial,
                                  immediate_ack, true,
                                  rxrpc_propose_ack_input_data);
+       else
+               rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial,
+                                 false, true,
+                                 rxrpc_propose_ack_input_data);
 
-       if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1)
+       if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) {
+               trace_rxrpc_notify_socket(call->debug_id, serial);
                rxrpc_notify_socket(call);
+       }
        _leave(" [queued]");
 }
 
@@ -1262,6 +1268,11 @@ void rxrpc_data_ready(struct sock *udp_sk)
                        /* But otherwise we need to retransmit the final packet
                         * from data cached in the connection record.
                         */
+                       if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
+                               trace_rxrpc_rx_data(chan->call_debug_id,
+                                                   sp->hdr.seq,
+                                                   sp->hdr.serial,
+                                                   sp->hdr.flags, 0);
                        rxrpc_post_packet_to_conn(conn, skb);
                        goto out_unlock;
                }
index 8325f1b868404690d54a6e98d70b6896c3537f72..13bd8a4dfac7126effa56e4a5766634e8278e0cc 100644 (file)
@@ -72,7 +72,10 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
        ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
        if (ret < 0)
                trace_rxrpc_tx_fail(local->debug_id, 0, ret,
-                                   rxrpc_tx_fail_version_reply);
+                                   rxrpc_tx_point_version_reply);
+       else
+               trace_rxrpc_tx_packet(local->debug_id, &whdr,
+                                     rxrpc_tx_point_version_reply);
 
        _leave("");
 }
index f03de1c59ba37678f36f3a5c0778f3f3f9274757..801dbf3d34789268f78f607574913f08bffc99cc 100644 (file)
@@ -183,7 +183,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 
        serial = atomic_inc_return(&conn->serial);
        pkt->whdr.serial = htonl(serial);
-       trace_rxrpc_tx_ack(call, serial,
+       trace_rxrpc_tx_ack(call->debug_id, serial,
                           ntohl(pkt->ack.firstPacket),
                           ntohl(pkt->ack.serial),
                           pkt->ack.reason, pkt->ack.nAcks);
@@ -212,7 +212,10 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
        conn->params.peer->last_tx_at = ktime_get_real();
        if (ret < 0)
                trace_rxrpc_tx_fail(call->debug_id, serial, ret,
-                                   rxrpc_tx_fail_call_ack);
+                                   rxrpc_tx_point_call_ack);
+       else
+               trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
+                                     rxrpc_tx_point_call_ack);
 
        if (call->state < RXRPC_CALL_COMPLETE) {
                if (ret < 0) {
@@ -299,7 +302,10 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
        conn->params.peer->last_tx_at = ktime_get_real();
        if (ret < 0)
                trace_rxrpc_tx_fail(call->debug_id, serial, ret,
-                                   rxrpc_tx_fail_call_abort);
+                                   rxrpc_tx_point_call_abort);
+       else
+               trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
+                                     rxrpc_tx_point_call_abort);
 
 
        rxrpc_put_connection(conn);
@@ -396,7 +402,10 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
        up_read(&conn->params.local->defrag_sem);
        if (ret < 0)
                trace_rxrpc_tx_fail(call->debug_id, serial, ret,
-                                   rxrpc_tx_fail_call_data_nofrag);
+                                   rxrpc_tx_point_call_data_nofrag);
+       else
+               trace_rxrpc_tx_packet(call->debug_id, &whdr,
+                                     rxrpc_tx_point_call_data_nofrag);
        if (ret == -EMSGSIZE)
                goto send_fragmentable;
 
@@ -488,7 +497,10 @@ send_fragmentable:
 
        if (ret < 0)
                trace_rxrpc_tx_fail(call->debug_id, serial, ret,
-                                   rxrpc_tx_fail_call_data_frag);
+                                   rxrpc_tx_point_call_data_frag);
+       else
+               trace_rxrpc_tx_packet(call->debug_id, &whdr,
+                                     rxrpc_tx_point_call_data_frag);
 
        up_write(&conn->params.local->defrag_sem);
        goto done;
@@ -545,7 +557,10 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
                        ret = kernel_sendmsg(local->socket, &msg, iov, 2, size);
                        if (ret < 0)
                                trace_rxrpc_tx_fail(local->debug_id, 0, ret,
-                                                   rxrpc_tx_fail_reject);
+                                                   rxrpc_tx_point_reject);
+                       else
+                               trace_rxrpc_tx_packet(local->debug_id, &whdr,
+                                                     rxrpc_tx_point_reject);
                }
 
                rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
@@ -597,7 +612,10 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
        ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len);
        if (ret < 0)
                trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
-                                   rxrpc_tx_fail_version_keepalive);
+                                   rxrpc_tx_point_version_keepalive);
+       else
+               trace_rxrpc_tx_packet(peer->debug_id, &whdr,
+                                     rxrpc_tx_point_version_keepalive);
 
        peer->last_tx_at = ktime_get_real();
        _leave("");
index d9fca8c4bcdc27288ab9f1fc3c433d722ab1495f..9805e3b85c3610d453931ea7a2ca46ed286f9dc5 100644 (file)
@@ -63,6 +63,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
        struct rxrpc_peer *peer;
        struct rxrpc_call *call;
        struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+       unsigned long timeout = 0;
        rxrpc_seq_t tx_hard_ack, rx_hard_ack;
        char lbuff[50], rbuff[50];
 
@@ -71,7 +72,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
                         "Proto Local                                          "
                         " Remote                                         "
                         " SvID ConnID   CallID   End Use State    Abort   "
-                        " UserID\n");
+                        " UserID           TxSeq    TW RxSeq    RW RxSerial RxTimo\n");
                return 0;
        }
 
@@ -94,11 +95,16 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
        else
                strcpy(rbuff, "no_connection");
 
+       if (call->state != RXRPC_CALL_SERVER_PREALLOC) {
+               timeout = READ_ONCE(call->expect_rx_by);
+               timeout -= jiffies;
+       }
+
        tx_hard_ack = READ_ONCE(call->tx_hard_ack);
        rx_hard_ack = READ_ONCE(call->rx_hard_ack);
        seq_printf(seq,
                   "UDP   %-47.47s %-47.47s %4x %08x %08x %s %3u"
-                  " %-8.8s %08x %lx %08x %02x %08x %02x\n",
+                  " %-8.8s %08x %lx %08x %02x %08x %02x %08x %06lx\n",
                   lbuff,
                   rbuff,
                   call->service_id,
@@ -110,7 +116,9 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
                   call->abort_code,
                   call->user_call_ID,
                   tx_hard_ack, READ_ONCE(call->tx_top) - tx_hard_ack,
-                  rx_hard_ack, READ_ONCE(call->rx_top) - rx_hard_ack);
+                  rx_hard_ack, READ_ONCE(call->rx_top) - rx_hard_ack,
+                  call->rx_serial,
+                  timeout);
 
        return 0;
 }
@@ -179,7 +187,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 print:
        seq_printf(seq,
                   "UDP   %-47.47s %-47.47s %4x %08x %s %3u"
-                  " %s %08x %08x %08x\n",
+                  " %s %08x %08x %08x %08x %08x %08x %08x\n",
                   lbuff,
                   rbuff,
                   conn->service_id,
@@ -189,7 +197,11 @@ print:
                   rxrpc_conn_states[conn->state],
                   key_serial(conn->params.key),
                   atomic_read(&conn->serial),
-                  conn->hi_serial);
+                  conn->hi_serial,
+                  conn->channels[0].call_id,
+                  conn->channels[1].call_id,
+                  conn->channels[2].call_id,
+                  conn->channels[3].call_id);
 
        return 0;
 }
index 7bff716e911ea6708fe23b6075323fd4371d5c76..816b19a78809349984f3fee0fbccbc7a32cda031 100644 (file)
@@ -144,13 +144,11 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
        trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top);
        ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
 
-#if 0 // TODO: May want to transmit final ACK under some circumstances anyway
        if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
-               rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false,
+               rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, false, true,
                                  rxrpc_propose_ack_terminal_ack);
-               rxrpc_send_ack_packet(call, false, NULL);
+               //rxrpc_send_ack_packet(call, false, NULL);
        }
-#endif
 
        write_lock_bh(&call->state_lock);
 
@@ -315,6 +313,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
        unsigned int rx_pkt_offset, rx_pkt_len;
        int ix, copy, ret = -EAGAIN, ret2;
 
+       if (test_and_clear_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags) &&
+           call->ackr_reason)
+               rxrpc_send_ack_packet(call, false, NULL);
+
        rx_pkt_offset = call->rx_pkt_offset;
        rx_pkt_len = call->rx_pkt_len;
 
@@ -414,6 +416,8 @@ out:
 done:
        trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq,
                            rx_pkt_offset, rx_pkt_len, ret);
+       if (ret == -EAGAIN)
+               set_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags);
        return ret;
 }
 
@@ -607,9 +611,7 @@ wait_error:
  * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info
  * @sock: The socket that the call exists on
  * @call: The call to send data through
- * @buf: The buffer to receive into
- * @size: The size of the buffer, including data already read
- * @_offset: The running offset into the buffer.
+ * @iter: The buffer to receive into
  * @want_more: True if more data is expected to be read
  * @_abort: Where the abort code is stored if -ECONNABORTED is returned
  * @_service: Where to store the actual service ID (may be upgraded)
@@ -622,39 +624,30 @@ wait_error:
  * Note that we may return -EAGAIN to drain empty packets at the end of the
  * data, even if we've already copied over the requested data.
  *
- * This function adds the amount it transfers to *_offset, so this should be
- * precleared as appropriate.  Note that the amount remaining in the buffer is
- * taken to be size - *_offset.
- *
  * *_abort should also be initialised to 0.
  */
 int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
-                          void *buf, size_t size, size_t *_offset,
+                          struct iov_iter *iter,
                           bool want_more, u32 *_abort, u16 *_service)
 {
-       struct iov_iter iter;
-       struct kvec iov;
+       size_t offset = 0;
        int ret;
 
-       _enter("{%d,%s},%zu/%zu,%d",
+       _enter("{%d,%s},%zu,%d",
               call->debug_id, rxrpc_call_states[call->state],
-              *_offset, size, want_more);
+              iov_iter_count(iter), want_more);
 
-       ASSERTCMP(*_offset, <=, size);
        ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING);
 
-       iov.iov_base = buf + *_offset;
-       iov.iov_len = size - *_offset;
-       iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
-
        mutex_lock(&call->user_mutex);
 
        switch (READ_ONCE(call->state)) {
        case RXRPC_CALL_CLIENT_RECV_REPLY:
        case RXRPC_CALL_SERVER_RECV_REQUEST:
        case RXRPC_CALL_SERVER_ACK_REQUEST:
-               ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0,
-                                        _offset);
+               ret = rxrpc_recvmsg_data(sock, call, NULL, iter,
+                                        iov_iter_count(iter), 0,
+                                        &offset);
                if (ret < 0)
                        goto out;
 
@@ -663,7 +656,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
                 * full buffer or have been given -EAGAIN.
                 */
                if (ret == 1) {
-                       if (*_offset < size)
+                       if (iov_iter_count(iter) > 0)
                                goto short_data;
                        if (!want_more)
                                goto read_phase_complete;
@@ -686,10 +679,21 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 read_phase_complete:
        ret = 1;
 out:
+       switch (call->ackr_reason) {
+       case RXRPC_ACK_IDLE:
+               break;
+       case RXRPC_ACK_DELAY:
+               if (ret != -EAGAIN)
+                       break;
+               /* Fall through */
+       default:
+               rxrpc_send_ack_packet(call, false, NULL);
+       }
+
        if (_service)
                *_service = call->service_id;
        mutex_unlock(&call->user_mutex);
-       _leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
+       _leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort);
        return ret;
 
 short_data:
@@ -705,7 +709,7 @@ call_complete:
        ret = call->error;
        if (call->completion == RXRPC_CALL_SUCCEEDED) {
                ret = 1;
-               if (size > 0)
+               if (iov_iter_count(iter) > 0)
                        ret = -ECONNRESET;
        }
        goto out;
index 278ac0807a60a8664bbb825ccd06737a595d8631..eaf8f4f446b02f54f0d12418c3988ca30b07459d 100644 (file)
@@ -146,10 +146,10 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
 static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
                                    struct sk_buff *skb,
                                    u32 data_size,
-                                   void *sechdr)
+                                   void *sechdr,
+                                   struct skcipher_request *req)
 {
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-       SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxkad_level1_hdr hdr;
        struct rxrpc_crypt iv;
        struct scatterlist sg;
@@ -183,12 +183,12 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
 static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
                                       struct sk_buff *skb,
                                       u32 data_size,
-                                      void *sechdr)
+                                      void *sechdr,
+                                      struct skcipher_request *req)
 {
        const struct rxrpc_key_token *token;
        struct rxkad_level2_hdr rxkhdr;
        struct rxrpc_skb_priv *sp;
-       SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist sg[16];
        struct sk_buff *trailer;
@@ -296,11 +296,12 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
                ret = 0;
                break;
        case RXRPC_SECURITY_AUTH:
-               ret = rxkad_secure_packet_auth(call, skb, data_size, sechdr);
+               ret = rxkad_secure_packet_auth(call, skb, data_size, sechdr,
+                                              req);
                break;
        case RXRPC_SECURITY_ENCRYPT:
                ret = rxkad_secure_packet_encrypt(call, skb, data_size,
-                                                 sechdr);
+                                                 sechdr, req);
                break;
        default:
                ret = -EPERM;
@@ -316,10 +317,10 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
  */
 static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
                                 unsigned int offset, unsigned int len,
-                                rxrpc_seq_t seq)
+                                rxrpc_seq_t seq,
+                                struct skcipher_request *req)
 {
        struct rxkad_level1_hdr sechdr;
-       SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist sg[16];
        struct sk_buff *trailer;
@@ -402,11 +403,11 @@ nomem:
  */
 static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
                                 unsigned int offset, unsigned int len,
-                                rxrpc_seq_t seq)
+                                rxrpc_seq_t seq,
+                                struct skcipher_request *req)
 {
        const struct rxrpc_key_token *token;
        struct rxkad_level2_hdr sechdr;
-       SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist _sg[4], *sg;
        struct sk_buff *trailer;
@@ -549,9 +550,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
        case RXRPC_SECURITY_PLAIN:
                return 0;
        case RXRPC_SECURITY_AUTH:
-               return rxkad_verify_packet_1(call, skb, offset, len, seq);
+               return rxkad_verify_packet_1(call, skb, offset, len, seq, req);
        case RXRPC_SECURITY_ENCRYPT:
-               return rxkad_verify_packet_2(call, skb, offset, len, seq);
+               return rxkad_verify_packet_2(call, skb, offset, len, seq, req);
        default:
                return -ENOANO;
        }
@@ -665,11 +666,13 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
        ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
        if (ret < 0) {
                trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
-                                   rxrpc_tx_fail_conn_challenge);
+                                   rxrpc_tx_point_rxkad_challenge);
                return -EAGAIN;
        }
 
        conn->params.peer->last_tx_at = ktime_get_real();
+       trace_rxrpc_tx_packet(conn->debug_id, &whdr,
+                             rxrpc_tx_point_rxkad_challenge);
        _leave(" = 0");
        return 0;
 }
@@ -721,11 +724,12 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
        ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len);
        if (ret < 0) {
                trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
-                                   rxrpc_tx_fail_conn_response);
+                                   rxrpc_tx_point_rxkad_response);
                return -EAGAIN;
        }
 
        conn->params.peer->last_tx_at = ktime_get_real();
+       trace_rxrpc_tx_packet(0, &whdr, rxrpc_tx_point_rxkad_response);
        _leave(" = 0");
        return 0;
 }
index a01169fb5325754c13c8b2b18facc29a1e24f243..e9574138831116a59db2c39aaa2cd4ee4f28e06b 100644 (file)
@@ -1,6 +1,6 @@
 #
 # Traffic control configuration.
-# 
+#
 
 menuconfig NET_SCHED
        bool "QoS and/or fair queueing"
@@ -183,6 +183,17 @@ config NET_SCH_CBS
          To compile this code as a module, choose M here: the
          module will be called sch_cbs.
 
+config NET_SCH_ETF
+       tristate "Earliest TxTime First (ETF)"
+       help
+         Say Y here if you want to use the Earliest TxTime First (ETF) packet
+         scheduling algorithm.
+
+         See the top of <file:net/sched/sch_etf.c> for more details.
+
+         To compile this code as a module, choose M here: the
+         module will be called sch_etf.
+
 config NET_SCH_GRED
        tristate "Generic Random Early Detection (GRED)"
        ---help---
@@ -240,6 +251,19 @@ config NET_SCH_MQPRIO
 
          If unsure, say N.
 
+config NET_SCH_SKBPRIO
+       tristate "SKB priority queue scheduler (SKBPRIO)"
+       help
+         Say Y here if you want to use the SKB priority queue
+         scheduler. This schedules packets according to skb->priority,
+         which is useful for request packets in DoS mitigation systems such
+         as Gatekeeper.
+
+         To compile this driver as a module, choose M here: the module will
+         be called sch_skbprio.
+
+         If unsure, say N.
+
 config NET_SCH_CHOKE
        tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
        help
@@ -284,6 +308,17 @@ config NET_SCH_FQ_CODEL
 
          If unsure, say N.
 
+config NET_SCH_CAKE
+       tristate "Common Applications Kept Enhanced (CAKE)"
+       help
+         Say Y here if you want to use the Common Applications Kept Enhanced
+          (CAKE) queue management algorithm.
+
+         To compile this driver as a module, choose M here: the module
+         will be called sch_cake.
+
+         If unsure, say N.
+
 config NET_SCH_FQ
        tristate "Fair Queue"
        help
@@ -684,7 +719,7 @@ config NET_CLS_ACT
 
 config NET_ACT_POLICE
        tristate "Traffic Policing"
-        depends on NET_CLS_ACT 
+        depends on NET_CLS_ACT
         ---help---
          Say Y here if you want to do traffic policing, i.e. strict
          bandwidth limiting. This action replaces the existing policing
index 8811d38048785f43334da160226709217d72ea97..f0403f49edcbd50e27d9ea450c2e46b5b4727b8e 100644 (file)
@@ -33,7 +33,7 @@ obj-$(CONFIG_NET_SCH_HTB)     += sch_htb.o
 obj-$(CONFIG_NET_SCH_HFSC)     += sch_hfsc.o
 obj-$(CONFIG_NET_SCH_RED)      += sch_red.o
 obj-$(CONFIG_NET_SCH_GRED)     += sch_gred.o
-obj-$(CONFIG_NET_SCH_INGRESS)  += sch_ingress.o 
+obj-$(CONFIG_NET_SCH_INGRESS)  += sch_ingress.o
 obj-$(CONFIG_NET_SCH_DSMARK)   += sch_dsmark.o
 obj-$(CONFIG_NET_SCH_SFB)      += sch_sfb.o
 obj-$(CONFIG_NET_SCH_SFQ)      += sch_sfq.o
@@ -46,14 +46,17 @@ obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)      += sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG)     += sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_SKBPRIO)  += sch_skbprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)    += sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)      += sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)    += sch_codel.o
 obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
+obj-$(CONFIG_NET_SCH_CAKE)     += sch_cake.o
 obj-$(CONFIG_NET_SCH_FQ)       += sch_fq.o
 obj-$(CONFIG_NET_SCH_HHF)      += sch_hhf.o
 obj-$(CONFIG_NET_SCH_PIE)      += sch_pie.o
 obj-$(CONFIG_NET_SCH_CBS)      += sch_cbs.o
+obj-$(CONFIG_NET_SCH_ETF)      += sch_etf.o
 
 obj-$(CONFIG_NET_CLS_U32)      += cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)   += cls_route.o
index 3f4cf930f809bbaca12a84caabc31f5c2b8d769c..229d63c99be23b2329caa84b6912c8f770ccca64 100644 (file)
@@ -36,7 +36,7 @@ static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
 
        if (!tp)
                return -EINVAL;
-       a->goto_chain = tcf_chain_get(tp->chain->block, chain_index, true);
+       a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index);
        if (!a->goto_chain)
                return -ENOMEM;
        return 0;
@@ -44,7 +44,7 @@ static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
 
 static void tcf_action_goto_chain_fini(struct tc_action *a)
 {
-       tcf_chain_put(a->goto_chain);
+       tcf_chain_put_by_act(a->goto_chain);
 }
 
 static void tcf_action_goto_chain_exec(const struct tc_action *a,
@@ -55,6 +55,24 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
        res->goto_tp = rcu_dereference_bh(chain->filter_chain);
 }
 
+static void tcf_free_cookie_rcu(struct rcu_head *p)
+{
+       struct tc_cookie *cookie = container_of(p, struct tc_cookie, rcu);
+
+       kfree(cookie->data);
+       kfree(cookie);
+}
+
+static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
+                                 struct tc_cookie *new_cookie)
+{
+       struct tc_cookie *old;
+
+       old = xchg((__force struct tc_cookie **)old_cookie, new_cookie);
+       if (old)
+               call_rcu(&old->rcu, tcf_free_cookie_rcu);
+}
+
 /* XXX: For standalone actions, we don't need a RCU grace period either, because
  * actions are always connected to filters and filters are already destroyed in
  * RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -65,44 +83,64 @@ static void free_tcf(struct tc_action *p)
        free_percpu(p->cpu_bstats);
        free_percpu(p->cpu_qstats);
 
-       if (p->act_cookie) {
-               kfree(p->act_cookie->data);
-               kfree(p->act_cookie);
-       }
+       tcf_set_action_cookie(&p->act_cookie, NULL);
        if (p->goto_chain)
                tcf_action_goto_chain_fini(p);
 
        kfree(p);
 }
 
-static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
+static void tcf_action_cleanup(struct tc_action *p)
 {
-       spin_lock(&idrinfo->lock);
-       idr_remove(&idrinfo->action_idr, p->tcfa_index);
-       spin_unlock(&idrinfo->lock);
+       if (p->ops->cleanup)
+               p->ops->cleanup(p);
+
        gen_kill_estimator(&p->tcfa_rate_est);
        free_tcf(p);
 }
 
+static int __tcf_action_put(struct tc_action *p, bool bind)
+{
+       struct tcf_idrinfo *idrinfo = p->idrinfo;
+
+       if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
+               if (bind)
+                       atomic_dec(&p->tcfa_bindcnt);
+               idr_remove(&idrinfo->action_idr, p->tcfa_index);
+               spin_unlock(&idrinfo->lock);
+
+               tcf_action_cleanup(p);
+               return 1;
+       }
+
+       if (bind)
+               atomic_dec(&p->tcfa_bindcnt);
+
+       return 0;
+}
+
 int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 {
        int ret = 0;
 
-       ASSERT_RTNL();
-
+       /* Release with strict==1 and bind==0 is only called through act API
+        * interface (classifiers always bind). Only case when action with
+        * positive reference count and zero bind count can exist is when it was
+        * also created with act API (unbinding last classifier will destroy the
+        * action if it was created by classifier). So only case when bind count
+        * can be changed after initial check is when unbound action is
+        * destroyed by act API while classifier binds to action with same id
+        * concurrently. This result either creation of new action(same behavior
+        * as before), or reusing existing action if concurrent process
+        * increments reference count before action is deleted. Both scenarios
+        * are acceptable.
+        */
        if (p) {
-               if (bind)
-                       p->tcfa_bindcnt--;
-               else if (strict && p->tcfa_bindcnt > 0)
+               if (!bind && strict && atomic_read(&p->tcfa_bindcnt) > 0)
                        return -EPERM;
 
-               p->tcfa_refcnt--;
-               if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
-                       if (p->ops->cleanup)
-                               p->ops->cleanup(p);
-                       tcf_idr_remove(p->idrinfo, p);
+               if (__tcf_action_put(p, bind))
                        ret = ACT_P_DELETED;
-               }
        }
 
        return ret;
@@ -111,10 +149,15 @@ EXPORT_SYMBOL(__tcf_idr_release);
 
 static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
 {
+       struct tc_cookie *act_cookie;
        u32 cookie_len = 0;
 
-       if (act->act_cookie)
-               cookie_len = nla_total_size(act->act_cookie->len);
+       rcu_read_lock();
+       act_cookie = rcu_dereference(act->act_cookie);
+
+       if (act_cookie)
+               cookie_len = nla_total_size(act_cookie->len);
+       rcu_read_unlock();
 
        return  nla_total_size(0) /* action number nested */
                + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
@@ -257,46 +300,77 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(tcf_generic_walker);
 
-static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
+static bool __tcf_idr_check(struct tc_action_net *tn, u32 index,
+                           struct tc_action **a, int bind)
 {
-       struct tc_action *p = NULL;
+       struct tcf_idrinfo *idrinfo = tn->idrinfo;
+       struct tc_action *p;
 
        spin_lock(&idrinfo->lock);
        p = idr_find(&idrinfo->action_idr, index);
+       if (IS_ERR(p)) {
+               p = NULL;
+       } else if (p) {
+               refcount_inc(&p->tcfa_refcnt);
+               if (bind)
+                       atomic_inc(&p->tcfa_bindcnt);
+       }
        spin_unlock(&idrinfo->lock);
 
-       return p;
+       if (p) {
+               *a = p;
+               return true;
+       }
+       return false;
 }
 
 int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 {
-       struct tcf_idrinfo *idrinfo = tn->idrinfo;
-       struct tc_action *p = tcf_idr_lookup(index, idrinfo);
-
-       if (p) {
-               *a = p;
-               return 1;
-       }
-       return 0;
+       return __tcf_idr_check(tn, index, a, 0);
 }
 EXPORT_SYMBOL(tcf_idr_search);
 
 bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
                   int bind)
+{
+       return __tcf_idr_check(tn, index, a, bind);
+}
+EXPORT_SYMBOL(tcf_idr_check);
+
+int tcf_idr_delete_index(struct tc_action_net *tn, u32 index)
 {
        struct tcf_idrinfo *idrinfo = tn->idrinfo;
-       struct tc_action *p = tcf_idr_lookup(index, idrinfo);
+       struct tc_action *p;
+       int ret = 0;
 
-       if (index && p) {
-               if (bind)
-                       p->tcfa_bindcnt++;
-               p->tcfa_refcnt++;
-               *a = p;
-               return true;
+       spin_lock(&idrinfo->lock);
+       p = idr_find(&idrinfo->action_idr, index);
+       if (!p) {
+               spin_unlock(&idrinfo->lock);
+               return -ENOENT;
        }
-       return false;
+
+       if (!atomic_read(&p->tcfa_bindcnt)) {
+               if (refcount_dec_and_test(&p->tcfa_refcnt)) {
+                       struct module *owner = p->ops->owner;
+
+                       WARN_ON(p != idr_remove(&idrinfo->action_idr,
+                                               p->tcfa_index));
+                       spin_unlock(&idrinfo->lock);
+
+                       tcf_action_cleanup(p);
+                       module_put(owner);
+                       return 0;
+               }
+               ret = 0;
+       } else {
+               ret = -EPERM;
+       }
+
+       spin_unlock(&idrinfo->lock);
+       return ret;
 }
-EXPORT_SYMBOL(tcf_idr_check);
+EXPORT_SYMBOL(tcf_idr_delete_index);
 
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
                   struct tc_action **a, const struct tc_action_ops *ops,
@@ -304,14 +378,13 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 {
        struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
        struct tcf_idrinfo *idrinfo = tn->idrinfo;
-       struct idr *idr = &idrinfo->action_idr;
        int err = -ENOMEM;
 
        if (unlikely(!p))
                return -ENOMEM;
-       p->tcfa_refcnt = 1;
+       refcount_set(&p->tcfa_refcnt, 1);
        if (bind)
-               p->tcfa_bindcnt = 1;
+               atomic_set(&p->tcfa_bindcnt, 1);
 
        if (cpustats) {
                p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
@@ -322,20 +395,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
                        goto err2;
        }
        spin_lock_init(&p->tcfa_lock);
-       idr_preload(GFP_KERNEL);
-       spin_lock(&idrinfo->lock);
-       /* user doesn't specify an index */
-       if (!index) {
-               index = 1;
-               err = idr_alloc_u32(idr, NULL, &index, UINT_MAX, GFP_ATOMIC);
-       } else {
-               err = idr_alloc_u32(idr, NULL, &index, index, GFP_ATOMIC);
-       }
-       spin_unlock(&idrinfo->lock);
-       idr_preload_end();
-       if (err)
-               goto err3;
-
        p->tcfa_index = index;
        p->tcfa_tm.install = jiffies;
        p->tcfa_tm.lastuse = jiffies;
@@ -345,7 +404,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
                                        &p->tcfa_rate_est,
                                        &p->tcfa_lock, NULL, est);
                if (err)
-                       goto err4;
+                       goto err3;
        }
 
        p->idrinfo = idrinfo;
@@ -353,8 +412,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
        INIT_LIST_HEAD(&p->list);
        *a = p;
        return 0;
-err4:
-       idr_remove(idr, index);
 err3:
        free_percpu(p->cpu_qstats);
 err2:
@@ -370,11 +427,78 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
        struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
        spin_lock(&idrinfo->lock);
-       idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
+       /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
+       WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
        spin_unlock(&idrinfo->lock);
 }
 EXPORT_SYMBOL(tcf_idr_insert);
 
+/* Cleanup idr index that was allocated but not initialized. */
+
+void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
+{
+       struct tcf_idrinfo *idrinfo = tn->idrinfo;
+
+       spin_lock(&idrinfo->lock);
+       /* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
+       WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
+       spin_unlock(&idrinfo->lock);
+}
+EXPORT_SYMBOL(tcf_idr_cleanup);
+
+/* Check if action with specified index exists. If actions is found, increments
+ * its reference and bind counters, and return 1. Otherwise insert temporary
+ * error pointer (to prevent concurrent users from inserting actions with same
+ * index) and return 0.
+ */
+
+int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
+                       struct tc_action **a, int bind)
+{
+       struct tcf_idrinfo *idrinfo = tn->idrinfo;
+       struct tc_action *p;
+       int ret;
+
+again:
+       spin_lock(&idrinfo->lock);
+       if (*index) {
+               p = idr_find(&idrinfo->action_idr, *index);
+               if (IS_ERR(p)) {
+                       /* This means that another process allocated
+                        * index but did not assign the pointer yet.
+                        */
+                       spin_unlock(&idrinfo->lock);
+                       goto again;
+               }
+
+               if (p) {
+                       refcount_inc(&p->tcfa_refcnt);
+                       if (bind)
+                               atomic_inc(&p->tcfa_bindcnt);
+                       *a = p;
+                       ret = 1;
+               } else {
+                       *a = NULL;
+                       ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
+                                           *index, GFP_ATOMIC);
+                       if (!ret)
+                               idr_replace(&idrinfo->action_idr,
+                                           ERR_PTR(-EBUSY), *index);
+               }
+       } else {
+               *index = 1;
+               *a = NULL;
+               ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
+                                   UINT_MAX, GFP_ATOMIC);
+               if (!ret)
+                       idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
+                                   *index);
+       }
+       spin_unlock(&idrinfo->lock);
+       return ret;
+}
+EXPORT_SYMBOL(tcf_idr_check_alloc);
+
 void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
                         struct tcf_idrinfo *idrinfo)
 {
@@ -538,13 +662,15 @@ repeat:
 }
 EXPORT_SYMBOL(tcf_action_exec);
 
-int tcf_action_destroy(struct list_head *actions, int bind)
+int tcf_action_destroy(struct tc_action *actions[], int bind)
 {
        const struct tc_action_ops *ops;
-       struct tc_action *a, *tmp;
-       int ret = 0;
+       struct tc_action *a;
+       int ret = 0, i;
 
-       list_for_each_entry_safe(a, tmp, actions, list) {
+       for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+               a = actions[i];
+               actions[i] = NULL;
                ops = a->ops;
                ret = __tcf_idr_release(a, bind, true);
                if (ret == ACT_P_DELETED)
@@ -555,6 +681,24 @@ int tcf_action_destroy(struct list_head *actions, int bind)
        return ret;
 }
 
+static int tcf_action_put(struct tc_action *p)
+{
+       return __tcf_action_put(p, false);
+}
+
+static void tcf_action_put_many(struct tc_action *actions[])
+{
+       int i;
+
+       for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+               struct tc_action *a = actions[i];
+               const struct tc_action_ops *ops = a->ops;
+
+               if (tcf_action_put(a))
+                       module_put(ops->owner);
+       }
+}
+
 int
 tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
@@ -567,16 +711,22 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
        int err = -EINVAL;
        unsigned char *b = skb_tail_pointer(skb);
        struct nlattr *nest;
+       struct tc_cookie *cookie;
 
        if (nla_put_string(skb, TCA_KIND, a->ops->kind))
                goto nla_put_failure;
        if (tcf_action_copy_stats(skb, a, 0))
                goto nla_put_failure;
-       if (a->act_cookie) {
-               if (nla_put(skb, TCA_ACT_COOKIE, a->act_cookie->len,
-                           a->act_cookie->data))
+
+       rcu_read_lock();
+       cookie = rcu_dereference(a->act_cookie);
+       if (cookie) {
+               if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
+                       rcu_read_unlock();
                        goto nla_put_failure;
+               }
        }
+       rcu_read_unlock();
 
        nest = nla_nest_start(skb, TCA_OPTIONS);
        if (nest == NULL)
@@ -593,14 +743,15 @@ nla_put_failure:
 }
 EXPORT_SYMBOL(tcf_action_dump_1);
 
-int tcf_action_dump(struct sk_buff *skb, struct list_head *actions,
+int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
                    int bind, int ref)
 {
        struct tc_action *a;
-       int err = -EINVAL;
+       int err = -EINVAL, i;
        struct nlattr *nest;
 
-       list_for_each_entry(a, actions, list) {
+       for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+               a = actions[i];
                nest = nla_nest_start(skb, a->order);
                if (nest == NULL)
                        goto nla_put_failure;
@@ -635,9 +786,19 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
        return c;
 }
 
+static bool tcf_action_valid(int action)
+{
+       int opcode = TC_ACT_EXT_OPCODE(action);
+
+       if (!opcode)
+               return action <= TC_ACT_VALUE_MAX;
+       return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
+}
+
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    struct nlattr *nla, struct nlattr *est,
                                    char *name, int ovr, int bind,
+                                   bool rtnl_held,
                                    struct netlink_ext_ack *extack)
 {
        struct tc_action *a;
@@ -688,9 +849,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
        a_o = tc_lookup_action_n(act_name);
        if (a_o == NULL) {
 #ifdef CONFIG_MODULES
-               rtnl_unlock();
+               if (rtnl_held)
+                       rtnl_unlock();
                request_module("act_%s", act_name);
-               rtnl_lock();
+               if (rtnl_held)
+                       rtnl_lock();
 
                a_o = tc_lookup_action_n(act_name);
 
@@ -713,19 +876,15 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
        /* backward compatibility for policer */
        if (name == NULL)
                err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
-                               extack);
+                               rtnl_held, extack);
        else
-               err = a_o->init(net, nla, est, &a, ovr, bind, extack);
+               err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
+                               extack);
        if (err < 0)
                goto err_mod;
 
-       if (name == NULL && tb[TCA_ACT_COOKIE]) {
-               if (a->act_cookie) {
-                       kfree(a->act_cookie->data);
-                       kfree(a->act_cookie);
-               }
-               a->act_cookie = cookie;
-       }
+       if (!name && tb[TCA_ACT_COOKIE])
+               tcf_set_action_cookie(&a->act_cookie, cookie);
 
        /* module count goes up only when brand new policy is created
         * if it exists and is only bound to in a_o->init() then
@@ -737,15 +896,19 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
        if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
                err = tcf_action_goto_chain_init(a, tp);
                if (err) {
-                       LIST_HEAD(actions);
+                       struct tc_action *actions[] = { a, NULL };
 
-                       list_add_tail(&a->list, &actions);
-                       tcf_action_destroy(&actions, bind);
+                       tcf_action_destroy(actions, bind);
                        NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
                        return ERR_PTR(err);
                }
        }
 
+       if (!tcf_action_valid(a->tcfa_action)) {
+               NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead");
+               a->tcfa_action = TC_ACT_UNSPEC;
+       }
+
        return a;
 
 err_mod:
@@ -758,21 +921,12 @@ err_out:
        return ERR_PTR(err);
 }
 
-static void cleanup_a(struct list_head *actions, int ovr)
-{
-       struct tc_action *a;
-
-       if (!ovr)
-               return;
-
-       list_for_each_entry(a, actions, list)
-               a->tcfa_refcnt--;
-}
+/* Returns numbers of initialized actions or negative error. */
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
                    struct nlattr *est, char *name, int ovr, int bind,
-                   struct list_head *actions, size_t *attr_size,
-                   struct netlink_ext_ack *extack)
+                   struct tc_action *actions[], size_t *attr_size,
+                   bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct tc_action *act;
@@ -786,25 +940,19 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 
        for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
                act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
-                                       extack);
+                                       rtnl_held, extack);
                if (IS_ERR(act)) {
                        err = PTR_ERR(act);
                        goto err;
                }
                act->order = i;
                sz += tcf_action_fill_size(act);
-               if (ovr)
-                       act->tcfa_refcnt++;
-               list_add_tail(&act->list, actions);
+               /* Start from index 0 */
+               actions[i - 1] = act;
        }
 
        *attr_size = tcf_action_full_attrs_size(sz);
-
-       /* Remove the temp refcnt which was necessary to protect against
-        * destroying an existing action which was being replaced
-        */
-       cleanup_a(actions, ovr);
-       return 0;
+       return i - 1;
 
 err:
        tcf_action_destroy(actions, bind);
@@ -855,7 +1003,7 @@ errout:
        return -1;
 }
 
-static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
+static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
                        u32 portid, u32 seq, u16 flags, int event, int bind,
                        int ref)
 {
@@ -891,7 +1039,7 @@ out_nlmsg_trim:
 
 static int
 tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
-              struct list_head *actions, int event,
+              struct tc_action *actions[], int event,
               struct netlink_ext_ack *extack)
 {
        struct sk_buff *skb;
@@ -900,7 +1048,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
        if (!skb)
                return -ENOBUFS;
        if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
-                        0, 0) <= 0) {
+                        0, 1) <= 0) {
                NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
                kfree_skb(skb);
                return -EINVAL;
@@ -1027,9 +1175,41 @@ err_out:
        return err;
 }
 
+static int tcf_action_delete(struct net *net, struct tc_action *actions[],
+                            int *acts_deleted, struct netlink_ext_ack *extack)
+{
+       u32 act_index;
+       int ret, i;
+
+       for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+               struct tc_action *a = actions[i];
+               const struct tc_action_ops *ops = a->ops;
+
+               /* Actions can be deleted concurrently so we must save their
+                * type and id to search again after reference is released.
+                */
+               act_index = a->tcfa_index;
+
+               if (tcf_action_put(a)) {
+                       /* last reference, action was deleted concurrently */
+                       module_put(ops->owner);
+               } else  {
+                       /* now do the delete */
+                       ret = ops->delete(net, act_index);
+                       if (ret < 0) {
+                               *acts_deleted = i + 1;
+                               return ret;
+                       }
+               }
+       }
+       *acts_deleted = i;
+       return 0;
+}
+
 static int
-tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-              u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
+tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
+              int *acts_deleted, u32 portid, size_t attr_size,
+              struct netlink_ext_ack *extack)
 {
        int ret;
        struct sk_buff *skb;
@@ -1040,14 +1220,14 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
                return -ENOBUFS;
 
        if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
-                        0, 1) <= 0) {
+                        0, 2) <= 0) {
                NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
                kfree_skb(skb);
                return -EINVAL;
        }
 
        /* now do the delete */
-       ret = tcf_action_destroy(actions, 0);
+       ret = tcf_action_delete(net, actions, acts_deleted, extack);
        if (ret < 0) {
                NL_SET_ERR_MSG(extack, "Failed to delete TC action");
                kfree_skb(skb);
@@ -1069,7 +1249,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct tc_action *act;
        size_t attr_size = 0;
-       LIST_HEAD(actions);
+       struct tc_action *actions[TCA_ACT_MAX_PRIO + 1] = {};
+       int acts_deleted = 0;
 
        ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
        if (ret < 0)
@@ -1091,27 +1272,27 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
                }
                act->order = i;
                attr_size += tcf_action_fill_size(act);
-               list_add_tail(&act->list, &actions);
+               actions[i - 1] = act;
        }
 
        attr_size = tcf_action_full_attrs_size(attr_size);
 
        if (event == RTM_GETACTION)
-               ret = tcf_get_notify(net, portid, n, &actions, event, extack);
+               ret = tcf_get_notify(net, portid, n, actions, event, extack);
        else { /* delete */
-               ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
+               ret = tcf_del_notify(net, n, actions, &acts_deleted, portid,
+                                    attr_size, extack);
                if (ret)
                        goto err;
                return ret;
        }
 err:
-       if (event != RTM_GETACTION)
-               tcf_action_destroy(&actions, 0);
+       tcf_action_put_many(&actions[acts_deleted]);
        return ret;
 }
 
 static int
-tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
+tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
               u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
        struct sk_buff *skb;
@@ -1142,14 +1323,17 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 {
        size_t attr_size = 0;
        int ret = 0;
-       LIST_HEAD(actions);
+       struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
 
-       ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
-                             &attr_size, extack);
-       if (ret)
+       ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, actions,
+                             &attr_size, true, extack);
+       if (ret < 0)
                return ret;
+       ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
+       if (ovr)
+               tcf_action_put_many(actions);
 
-       return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+       return ret;
 }
 
 static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
index 18089c02e55719d9818842f8cd3b35fa6cf94497..6203eb075c9aab06fc9ff30d762b774d2d9d2e71 100644 (file)
@@ -141,8 +141,8 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
        struct tcf_bpf *prog = to_bpf(act);
        struct tc_act_bpf opt = {
                .index   = prog->tcf_index,
-               .refcnt  = prog->tcf_refcnt - ref,
-               .bindcnt = prog->tcf_bindcnt - bind,
+               .refcnt  = refcount_read(&prog->tcf_refcnt) - ref,
+               .bindcnt = atomic_read(&prog->tcf_bindcnt) - bind,
                .action  = prog->tcf_action,
        };
        struct tcf_t tm;
@@ -196,12 +196,10 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
        if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS]))
                return -EINVAL;
 
-       bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+       bpf_ops = kmemdup(nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size, GFP_KERNEL);
        if (bpf_ops == NULL)
                return -ENOMEM;
 
-       memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
-
        fprog_tmp.len = bpf_num_ops;
        fprog_tmp.filter = bpf_ops;
 
@@ -276,7 +274,8 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **act,
-                       int replace, int bind, struct netlink_ext_ack *extack)
+                       int replace, int bind, bool rtnl_held,
+                       struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, bpf_net_id);
        struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -298,21 +297,27 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
        parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
 
-       if (!tcf_idr_check(tn, parm->index, act, bind)) {
+       ret = tcf_idr_check_alloc(tn, &parm->index, act, bind);
+       if (!ret) {
                ret = tcf_idr_create(tn, parm->index, est, act,
                                     &act_bpf_ops, bind, true);
-               if (ret < 0)
+               if (ret < 0) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
 
                res = ACT_P_CREATED;
-       } else {
+       } else if (ret > 0) {
                /* Don't override defaults. */
                if (bind)
                        return 0;
 
-               tcf_idr_release(*act, bind);
-               if (!replace)
+               if (!replace) {
+                       tcf_idr_release(*act, bind);
                        return -EEXIST;
+               }
+       } else {
+               return ret;
        }
 
        is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
@@ -355,8 +360,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
        return res;
 out:
-       if (res == ACT_P_CREATED)
-               tcf_idr_release(*act, bind);
+       tcf_idr_release(*act, bind);
 
        return ret;
 }
@@ -387,6 +391,13 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_bpf_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_bpf_ops __read_mostly = {
        .kind           =       "bpf",
        .type           =       TCA_ACT_BPF,
@@ -397,6 +408,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
        .init           =       tcf_bpf_init,
        .walk           =       tcf_bpf_walker,
        .lookup         =       tcf_bpf_search,
+       .delete         =       tcf_bpf_delete,
        .size           =       sizeof(struct tcf_bpf),
 };
 
index e4b880fa51fec90fa1a1d92c11f8a337637d3509..2f9bc833d0467029f8e854e6a036583836d24567 100644 (file)
@@ -96,7 +96,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
                             struct nlattr *est, struct tc_action **a,
-                            int ovr, int bind,
+                            int ovr, int bind, bool rtnl_held,
                             struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, connmark_net_id);
@@ -118,11 +118,14 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
        parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
-       if (!tcf_idr_check(tn, parm->index, a, bind)) {
+       ret = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (!ret) {
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_connmark_ops, bind, false);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
 
                ci = to_connmark(*a);
                ci->tcf_action = parm->action;
@@ -131,16 +134,18 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
                tcf_idr_insert(tn, *a);
                ret = ACT_P_CREATED;
-       } else {
+       } else if (ret > 0) {
                ci = to_connmark(*a);
                if (bind)
                        return 0;
-               tcf_idr_release(*a, bind);
-               if (!ovr)
+               if (!ovr) {
+                       tcf_idr_release(*a, bind);
                        return -EEXIST;
+               }
                /* replacing action and zone */
                ci->tcf_action = parm->action;
                ci->zone = parm->zone;
+               ret = 0;
        }
 
        return ret;
@@ -154,8 +159,8 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
 
        struct tc_connmark opt = {
                .index   = ci->tcf_index,
-               .refcnt  = ci->tcf_refcnt - ref,
-               .bindcnt = ci->tcf_bindcnt - bind,
+               .refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
+               .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
                .action  = ci->tcf_action,
                .zone   = ci->zone,
        };
@@ -193,6 +198,13 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_connmark_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_connmark_ops = {
        .kind           =       "connmark",
        .type           =       TCA_ACT_CONNMARK,
@@ -202,6 +214,7 @@ static struct tc_action_ops act_connmark_ops = {
        .init           =       tcf_connmark_init,
        .walk           =       tcf_connmark_walker,
        .lookup         =       tcf_connmark_search,
+       .delete         =       tcf_connmark_delete,
        .size           =       sizeof(struct tcf_connmark_info),
 };
 
@@ -239,4 +252,3 @@ module_exit(connmark_cleanup_module);
 MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
 MODULE_DESCRIPTION("Connection tracking mark restoring");
 MODULE_LICENSE("GPL");
-
index 6e7124e57918e98433f0d3302565ae4e0b9eaaf4..648a3a35b720112eec3b46bf9aada9a8e772e796 100644 (file)
@@ -46,7 +46,8 @@ static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a, int ovr,
-                        int bind, struct netlink_ext_ack *extack)
+                        int bind, bool rtnl_held,
+                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, csum_net_id);
        struct tcf_csum_params *params_old, *params_new;
@@ -66,18 +67,24 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
                return -EINVAL;
        parm = nla_data(tb[TCA_CSUM_PARMS]);
 
-       if (!tcf_idr_check(tn, parm->index, a, bind)) {
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (!err) {
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_csum_ops, bind, true);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
                ret = ACT_P_CREATED;
-       } else {
+       } else if (err > 0) {
                if (bind)/* dont override defaults */
                        return 0;
-               tcf_idr_release(*a, bind);
-               if (!ovr)
+               if (!ovr) {
+                       tcf_idr_release(*a, bind);
                        return -EEXIST;
+               }
+       } else {
+               return err;
        }
 
        p = to_tcf_csum(*a);
@@ -85,8 +92,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 
        params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
        if (unlikely(!params_new)) {
-               if (ret == ACT_P_CREATED)
-                       tcf_idr_release(*a, bind);
+               tcf_idr_release(*a, bind);
                return -ENOMEM;
        }
        params_old = rtnl_dereference(p->params);
@@ -555,15 +561,14 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
        u32 update_flags;
        int action;
 
-       rcu_read_lock();
-       params = rcu_dereference(p->params);
+       params = rcu_dereference_bh(p->params);
 
        tcf_lastuse_update(&p->tcf_tm);
        bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
 
        action = READ_ONCE(p->tcf_action);
        if (unlikely(action == TC_ACT_SHOT))
-               goto drop_stats;
+               goto drop;
 
        update_flags = params->update_flags;
        switch (tc_skb_protocol(skb)) {
@@ -577,16 +582,11 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
                break;
        }
 
-unlock:
-       rcu_read_unlock();
        return action;
 
 drop:
-       action = TC_ACT_SHOT;
-
-drop_stats:
        qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
-       goto unlock;
+       return TC_ACT_SHOT;
 }
 
 static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -597,8 +597,8 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
        struct tcf_csum_params *params;
        struct tc_csum opt = {
                .index   = p->tcf_index,
-               .refcnt  = p->tcf_refcnt - ref,
-               .bindcnt = p->tcf_bindcnt - bind,
+               .refcnt  = refcount_read(&p->tcf_refcnt) - ref,
+               .bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
                .action  = p->tcf_action,
        };
        struct tcf_t t;
@@ -653,6 +653,13 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
        return nla_total_size(sizeof(struct tc_csum));
 }
 
+static int tcf_csum_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_csum_ops = {
        .kind           = "csum",
        .type           = TCA_ACT_CSUM,
@@ -664,6 +671,7 @@ static struct tc_action_ops act_csum_ops = {
        .walk           = tcf_csum_walker,
        .lookup         = tcf_csum_search,
        .get_fill_size  = tcf_csum_get_fill_size,
+       .delete         = tcf_csum_delete,
        .size           = sizeof(struct tcf_csum),
 };
 
index 4dc4f153cad80861d38f975f7f70b7ce433dbc80..661b72b9147d52d320f094b91b7392488c1f25c2 100644 (file)
@@ -56,7 +56,8 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, struct netlink_ext_ack *extack)
+                        int ovr, int bind, bool rtnl_held,
+                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, gact_net_id);
        struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -90,18 +91,24 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
        }
 #endif
 
-       if (!tcf_idr_check(tn, parm->index, a, bind)) {
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (!err) {
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_gact_ops, bind, true);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
                ret = ACT_P_CREATED;
-       } else {
+       } else if (err > 0) {
                if (bind)/* dont override defaults */
                        return 0;
-               tcf_idr_release(*a, bind);
-               if (!ovr)
+               if (!ovr) {
+                       tcf_idr_release(*a, bind);
                        return -EEXIST;
+               }
+       } else {
+               return err;
        }
 
        gact = to_gact(*a);
@@ -169,8 +176,8 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
        struct tcf_gact *gact = to_gact(a);
        struct tc_gact opt = {
                .index   = gact->tcf_index,
-               .refcnt  = gact->tcf_refcnt - ref,
-               .bindcnt = gact->tcf_bindcnt - bind,
+               .refcnt  = refcount_read(&gact->tcf_refcnt) - ref,
+               .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
                .action  = gact->tcf_action,
        };
        struct tcf_t t;
@@ -230,6 +237,13 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
        return sz;
 }
 
+static int tcf_gact_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_gact_ops = {
        .kind           =       "gact",
        .type           =       TCA_ACT_GACT,
@@ -241,6 +255,7 @@ static struct tc_action_ops act_gact_ops = {
        .walk           =       tcf_gact_walker,
        .lookup         =       tcf_gact_search,
        .get_fill_size  =       tcf_gact_get_fill_size,
+       .delete         =       tcf_gact_delete,
        .size           =       sizeof(struct tcf_gact),
 };
 
index 20d7d36b2fc9b9d3af256f48795da6e387f7f781..df4060e32d43e85708678ebb283319a5344db481 100644 (file)
@@ -448,7 +448,8 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **a,
-                       int ovr, int bind, struct netlink_ext_ack *extack)
+                       int ovr, int bind, bool rtnl_held,
+                       struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ife_net_id);
        struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -483,7 +484,12 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
        if (!p)
                return -ENOMEM;
 
-       exists = tcf_idr_check(tn, parm->index, a, bind);
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (err < 0) {
+               kfree(p);
+               return err;
+       }
+       exists = err;
        if (exists && bind) {
                kfree(p);
                return 0;
@@ -493,16 +499,15 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
                ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
                                     bind, true);
                if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        kfree(p);
                        return ret;
                }
                ret = ACT_P_CREATED;
-       } else {
+       } else if (!ovr) {
                tcf_idr_release(*a, bind);
-               if (!ovr) {
-                       kfree(p);
-                       return -EEXIST;
-               }
+               kfree(p);
+               return -EEXIST;
        }
 
        ife = to_ife(*a);
@@ -547,6 +552,8 @@ metadata_parse_err:
 
                        if (exists)
                                spin_unlock_bh(&ife->tcf_lock);
+                       tcf_idr_release(*a, bind);
+
                        kfree(p);
                        return err;
                }
@@ -596,8 +603,8 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
        struct tcf_ife_params *p = rtnl_dereference(ife->params);
        struct tc_ife opt = {
                .index = ife->tcf_index,
-               .refcnt = ife->tcf_refcnt - ref,
-               .bindcnt = ife->tcf_bindcnt - bind,
+               .refcnt = refcount_read(&ife->tcf_refcnt) - ref,
+               .bindcnt = atomic_read(&ife->tcf_bindcnt) - bind,
                .action = ife->tcf_action,
                .flags = p->flags,
        };
@@ -813,14 +820,11 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
        struct tcf_ife_params *p;
        int ret;
 
-       rcu_read_lock();
-       p = rcu_dereference(ife->params);
+       p = rcu_dereference_bh(ife->params);
        if (p->flags & IFE_ENCODE) {
                ret = tcf_ife_encode(skb, a, res, p);
-               rcu_read_unlock();
                return ret;
        }
-       rcu_read_unlock();
 
        return tcf_ife_decode(skb, a, res);
 }
@@ -843,6 +847,13 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_ife_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, ife_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_ife_ops = {
        .kind = "ife",
        .type = TCA_ACT_IFE,
@@ -853,6 +864,7 @@ static struct tc_action_ops act_ife_ops = {
        .init = tcf_ife_init,
        .walk = tcf_ife_walker,
        .lookup = tcf_ife_search,
+       .delete = tcf_ife_delete,
        .size = sizeof(struct tcf_ife_info),
 };
 
index 14c312d7908f535cb4f43d8d0a73e4cbe445d362..0dc787a57798292be40ba1f66c16d2affd31a046 100644 (file)
@@ -119,13 +119,18 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
        if (tb[TCA_IPT_INDEX] != NULL)
                index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
-       exists = tcf_idr_check(tn, index, a, bind);
+       err = tcf_idr_check_alloc(tn, &index, a, bind);
+       if (err < 0)
+               return err;
+       exists = err;
        if (exists && bind)
                return 0;
 
        if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
                if (exists)
                        tcf_idr_release(*a, bind);
+               else
+                       tcf_idr_cleanup(tn, index);
                return -EINVAL;
        }
 
@@ -133,22 +138,27 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
        if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
                if (exists)
                        tcf_idr_release(*a, bind);
+               else
+                       tcf_idr_cleanup(tn, index);
                return -EINVAL;
        }
 
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a, ops, bind,
                                     false);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, index);
                        return ret;
+               }
                ret = ACT_P_CREATED;
        } else {
                if (bind)/* dont override defaults */
                        return 0;
-               tcf_idr_release(*a, bind);
 
-               if (!ovr)
+               if (!ovr) {
+                       tcf_idr_release(*a, bind);
                        return -EEXIST;
+               }
        }
        hook = nla_get_u32(tb[TCA_IPT_HOOK]);
 
@@ -196,7 +206,8 @@ err1:
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **a, int ovr,
-                       int bind, struct netlink_ext_ack *extack)
+                       int bind, bool rtnl_held,
+                       struct netlink_ext_ack *extack)
 {
        return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
                              bind);
@@ -204,7 +215,8 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
                       struct nlattr *est, struct tc_action **a, int ovr,
-                      int bind, struct netlink_ext_ack *extack)
+                      int bind, bool unlocked,
+                      struct netlink_ext_ack *extack)
 {
        return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
                              bind);
@@ -280,8 +292,8 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
        if (unlikely(!t))
                goto nla_put_failure;
 
-       c.bindcnt = ipt->tcf_bindcnt - bind;
-       c.refcnt = ipt->tcf_refcnt - ref;
+       c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind;
+       c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref;
        strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
 
        if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) ||
@@ -322,6 +334,13 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_ipt_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_ipt_ops = {
        .kind           =       "ipt",
        .type           =       TCA_ACT_IPT,
@@ -332,6 +351,7 @@ static struct tc_action_ops act_ipt_ops = {
        .init           =       tcf_ipt_init,
        .walk           =       tcf_ipt_walker,
        .lookup         =       tcf_ipt_search,
+       .delete         =       tcf_ipt_delete,
        .size           =       sizeof(struct tcf_ipt),
 };
 
@@ -372,6 +392,13 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_xt_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_xt_ops = {
        .kind           =       "xt",
        .type           =       TCA_ACT_XT,
@@ -382,6 +409,7 @@ static struct tc_action_ops act_xt_ops = {
        .init           =       tcf_xt_init,
        .walk           =       tcf_xt_walker,
        .lookup         =       tcf_xt_search,
+       .delete         =       tcf_xt_delete,
        .size           =       sizeof(struct tcf_ipt),
 };
 
index fd34015331ab86c395a2e599546a51b64efb8625..b26d060da08ea350019c21e72c1494744a82f01f 100644 (file)
@@ -25,6 +25,7 @@
 #include <net/net_namespace.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 #include <linux/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_mirred.h>
 
@@ -49,6 +50,18 @@ static bool tcf_mirred_act_wants_ingress(int action)
        }
 }
 
+static bool tcf_mirred_can_reinsert(int action)
+{
+       switch (action) {
+       case TC_ACT_SHOT:
+       case TC_ACT_STOLEN:
+       case TC_ACT_QUEUED:
+       case TC_ACT_TRAP:
+               return true;
+       }
+       return false;
+}
+
 static void tcf_mirred_release(struct tc_action *a)
 {
        struct tcf_mirred *m = to_mirred(a);
@@ -68,8 +81,9 @@ static unsigned int mirred_net_id;
 static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
-                          struct nlattr *est, struct tc_action **a, int ovr,
-                          int bind, struct netlink_ext_ack *extack)
+                          struct nlattr *est, struct tc_action **a,
+                          int ovr, int bind, bool rtnl_held,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, mirred_net_id);
        struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -78,7 +92,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
        struct tcf_mirred *m;
        struct net_device *dev;
        bool exists = false;
-       int ret;
+       int ret, err;
 
        if (!nla) {
                NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
@@ -93,7 +107,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
        }
        parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
-       exists = tcf_idr_check(tn, parm->index, a, bind);
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (err < 0)
+               return err;
+       exists = err;
        if (exists && bind)
                return 0;
 
@@ -106,6 +123,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
        default:
                if (exists)
                        tcf_idr_release(*a, bind);
+               else
+                       tcf_idr_cleanup(tn, parm->index);
                NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
                return -EINVAL;
        }
@@ -114,6 +133,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                if (dev == NULL) {
                        if (exists)
                                tcf_idr_release(*a, bind);
+                       else
+                               tcf_idr_cleanup(tn, parm->index);
                        return -ENODEV;
                }
                mac_header_xmit = dev_is_mac_header_xmit(dev);
@@ -123,18 +144,20 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 
        if (!exists) {
                if (!dev) {
+                       tcf_idr_cleanup(tn, parm->index);
                        NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
                        return -EINVAL;
                }
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_mirred_ops, bind, true);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
                ret = ACT_P_CREATED;
-       } else {
+       } else if (!ovr) {
                tcf_idr_release(*a, bind);
-               if (!ovr)
-                       return -EEXIST;
+               return -EEXIST;
        }
        m = to_mirred(*a);
 
@@ -161,21 +184,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
                      struct tcf_result *res)
 {
        struct tcf_mirred *m = to_mirred(a);
+       struct sk_buff *skb2 = skb;
        bool m_mac_header_xmit;
        struct net_device *dev;
-       struct sk_buff *skb2;
        int retval, err = 0;
+       bool use_reinsert;
+       bool want_ingress;
+       bool is_redirect;
        int m_eaction;
        int mac_len;
 
        tcf_lastuse_update(&m->tcf_tm);
        bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
 
-       rcu_read_lock();
        m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
        m_eaction = READ_ONCE(m->tcfm_eaction);
        retval = READ_ONCE(m->tcf_action);
-       dev = rcu_dereference(m->tcfm_dev);
+       dev = rcu_dereference_bh(m->tcfm_dev);
        if (unlikely(!dev)) {
                pr_notice_once("tc mirred: target device is gone\n");
                goto out;
@@ -187,16 +212,25 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
                goto out;
        }
 
-       skb2 = skb_clone(skb, GFP_ATOMIC);
-       if (!skb2)
-               goto out;
+       /* we could easily avoid the clone only if called by ingress and clsact;
+        * since we can't easily detect the clsact caller, skip clone only for
+        * ingress - that covers the TC S/W datapath.
+        */
+       is_redirect = tcf_mirred_is_act_redirect(m_eaction);
+       use_reinsert = skb_at_tc_ingress(skb) && is_redirect &&
+                      tcf_mirred_can_reinsert(retval);
+       if (!use_reinsert) {
+               skb2 = skb_clone(skb, GFP_ATOMIC);
+               if (!skb2)
+                       goto out;
+       }
 
        /* If action's target direction differs than filter's direction,
         * and devices expect a mac header on xmit, then mac push/pull is
         * needed.
         */
-       if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) &&
-           m_mac_header_xmit) {
+       want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
+       if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
                if (!skb_at_tc_ingress(skb)) {
                        /* caught at egress, act ingress: pull mac */
                        mac_len = skb_network_header(skb) - skb_mac_header(skb);
@@ -207,15 +241,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
                }
        }
 
+       skb2->skb_iif = skb->dev->ifindex;
+       skb2->dev = dev;
+
        /* mirror is always swallowed */
-       if (tcf_mirred_is_act_redirect(m_eaction)) {
+       if (is_redirect) {
                skb2->tc_redirected = 1;
                skb2->tc_from_ingress = skb2->tc_at_ingress;
+
+               /* let's the caller reinsert the packet, if possible */
+               if (use_reinsert) {
+                       res->ingress = want_ingress;
+                       res->qstats = this_cpu_ptr(m->common.cpu_qstats);
+                       return TC_ACT_REINSERT;
+               }
        }
 
-       skb2->skb_iif = skb->dev->ifindex;
-       skb2->dev = dev;
-       if (!tcf_mirred_act_wants_ingress(m_eaction))
+       if (!want_ingress)
                err = dev_queue_xmit(skb2);
        else
                err = netif_receive_skb(skb2);
@@ -226,7 +268,6 @@ out:
                if (tcf_mirred_is_act_redirect(m_eaction))
                        retval = TC_ACT_SHOT;
        }
-       rcu_read_unlock();
 
        return retval;
 }
@@ -250,8 +291,8 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
        struct tc_mirred opt = {
                .index   = m->tcf_index,
                .action  = m->tcf_action,
-               .refcnt  = m->tcf_refcnt - ref,
-               .bindcnt = m->tcf_bindcnt - bind,
+               .refcnt  = refcount_read(&m->tcf_refcnt) - ref,
+               .bindcnt = atomic_read(&m->tcf_bindcnt) - bind,
                .eaction = m->tcfm_eaction,
                .ifindex = dev ? dev->ifindex : 0,
        };
@@ -321,6 +362,13 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
        return rtnl_dereference(m->tcfm_dev);
 }
 
+static int tcf_mirred_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_mirred_ops = {
        .kind           =       "mirred",
        .type           =       TCA_ACT_MIRRED,
@@ -334,6 +382,7 @@ static struct tc_action_ops act_mirred_ops = {
        .lookup         =       tcf_mirred_search,
        .size           =       sizeof(struct tcf_mirred),
        .get_dev        =       tcf_mirred_get_dev,
+       .delete         =       tcf_mirred_delete,
 };
 
 static __net_init int mirred_init_net(struct net *net)
index 4b5848b6c25207ac74b0508259f9f3019020d3c9..4dd9188a72fddd9ebb7ccf87950d1068b61837af 100644 (file)
@@ -38,7 +38,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
                        struct tc_action **a, int ovr, int bind,
-                       struct netlink_ext_ack *extack)
+                       bool rtnl_held, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, nat_net_id);
        struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -57,18 +57,24 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
                return -EINVAL;
        parm = nla_data(tb[TCA_NAT_PARMS]);
 
-       if (!tcf_idr_check(tn, parm->index, a, bind)) {
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (!err) {
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_nat_ops, bind, false);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
                ret = ACT_P_CREATED;
-       } else {
+       } else if (err > 0) {
                if (bind)
                        return 0;
-               tcf_idr_release(*a, bind);
-               if (!ovr)
+               if (!ovr) {
+                       tcf_idr_release(*a, bind);
                        return -EEXIST;
+               }
+       } else {
+               return err;
        }
        p = to_tcf_nat(*a);
 
@@ -257,8 +263,8 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 
                .index    = p->tcf_index,
                .action   = p->tcf_action,
-               .refcnt   = p->tcf_refcnt - ref,
-               .bindcnt  = p->tcf_bindcnt - bind,
+               .refcnt   = refcount_read(&p->tcf_refcnt) - ref,
+               .bindcnt  = atomic_read(&p->tcf_bindcnt) - bind,
        };
        struct tcf_t t;
 
@@ -294,6 +300,13 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_nat_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_nat_ops = {
        .kind           =       "nat",
        .type           =       TCA_ACT_NAT,
@@ -303,6 +316,7 @@ static struct tc_action_ops act_nat_ops = {
        .init           =       tcf_nat_init,
        .walk           =       tcf_nat_walker,
        .lookup         =       tcf_nat_search,
+       .delete         =       tcf_nat_delete,
        .size           =       sizeof(struct tcf_nat),
 };
 
index 8a925c72db5fe413eaf4db3ac231f26484b049cb..43ba999b2d2322c7abb1063ceebe6af10d80c6b7 100644 (file)
@@ -132,20 +132,23 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                         int ovr, int bind, struct netlink_ext_ack *extack)
+                         int ovr, int bind, bool rtnl_held,
+                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, pedit_net_id);
        struct nlattr *tb[TCA_PEDIT_MAX + 1];
-       struct nlattr *pattr;
-       struct tc_pedit *parm;
-       int ret = 0, err;
-       struct tcf_pedit *p;
        struct tc_pedit_key *keys = NULL;
        struct tcf_pedit_key_ex *keys_ex;
+       struct tc_pedit *parm;
+       struct nlattr *pattr;
+       struct tcf_pedit *p;
+       int ret = 0, err;
        int ksize;
 
-       if (nla == NULL)
+       if (!nla) {
+               NL_SET_ERR_MSG_MOD(extack, "Pedit requires attributes to be passed");
                return -EINVAL;
+       }
 
        err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL);
        if (err < 0)
@@ -154,47 +157,62 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
        pattr = tb[TCA_PEDIT_PARMS];
        if (!pattr)
                pattr = tb[TCA_PEDIT_PARMS_EX];
-       if (!pattr)
+       if (!pattr) {
+               NL_SET_ERR_MSG_MOD(extack, "Missing required TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute");
                return -EINVAL;
+       }
 
        parm = nla_data(pattr);
        ksize = parm->nkeys * sizeof(struct tc_pedit_key);
-       if (nla_len(pattr) < sizeof(*parm) + ksize)
+       if (nla_len(pattr) < sizeof(*parm) + ksize) {
+               NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid");
                return -EINVAL;
+       }
 
        keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
        if (IS_ERR(keys_ex))
                return PTR_ERR(keys_ex);
 
-       if (!tcf_idr_check(tn, parm->index, a, bind)) {
-               if (!parm->nkeys)
-                       return -EINVAL;
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (!err) {
+               if (!parm->nkeys) {
+                       tcf_idr_cleanup(tn, parm->index);
+                       NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
+                       ret = -EINVAL;
+                       goto out_free;
+               }
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_pedit_ops, bind, false);
-               if (ret)
-                       return ret;
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
+                       goto out_free;
+               }
                p = to_pedit(*a);
                keys = kmalloc(ksize, GFP_KERNEL);
-               if (keys == NULL) {
+               if (!keys) {
                        tcf_idr_release(*a, bind);
-                       kfree(keys_ex);
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto out_free;
                }
                ret = ACT_P_CREATED;
-       } else {
+       } else if (err > 0) {
                if (bind)
-                       return 0;
-               tcf_idr_release(*a, bind);
-               if (!ovr)
-                       return -EEXIST;
+                       goto out_free;
+               if (!ovr) {
+                       tcf_idr_release(*a, bind);
+                       ret = -EEXIST;
+                       goto out_free;
+               }
                p = to_pedit(*a);
                if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
                        keys = kmalloc(ksize, GFP_KERNEL);
                        if (!keys) {
-                               kfree(keys_ex);
-                               return -ENOMEM;
+                               ret = -ENOMEM;
+                               goto out_free;
                        }
                }
+       } else {
+               return err;
        }
 
        spin_lock_bh(&p->tcf_lock);
@@ -214,12 +232,17 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
        if (ret == ACT_P_CREATED)
                tcf_idr_insert(tn, *a);
        return ret;
+out_free:
+       kfree(keys_ex);
+       return ret;
+
 }
 
 static void tcf_pedit_cleanup(struct tc_action *a)
 {
        struct tcf_pedit *p = to_pedit(a);
        struct tc_pedit_key *keys = p->tcfp_keys;
+
        kfree(keys);
        kfree(p->tcfp_keys_ex);
 }
@@ -263,7 +286,7 @@ static int pedit_skb_hdr_offset(struct sk_buff *skb,
        default:
                ret = -EINVAL;
                break;
-       };
+       }
 
        return ret;
 }
@@ -284,11 +307,12 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
        if (p->tcfp_nkeys > 0) {
                struct tc_pedit_key *tkey = p->tcfp_keys;
                struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex;
-               enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+               enum pedit_header_type htype =
+                       TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
                enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
 
                for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
-                       u32 *ptr, _data;
+                       u32 *ptr, hdata;
                        int offset = tkey->off;
                        int hoffset;
                        u32 val;
@@ -303,39 +327,39 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 
                        rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
                        if (rc) {
-                               pr_info("tc filter pedit bad header type specified (0x%x)\n",
+                               pr_info("tc action pedit bad header type specified (0x%x)\n",
                                        htype);
                                goto bad;
                        }
 
                        if (tkey->offmask) {
-                               char *d, _d;
+                               u8 *d, _d;
 
                                if (!offset_valid(skb, hoffset + tkey->at)) {
-                                       pr_info("tc filter pedit 'at' offset %d out of bounds\n",
+                                       pr_info("tc action pedit 'at' offset %d out of bounds\n",
                                                hoffset + tkey->at);
                                        goto bad;
                                }
-                               d = skb_header_pointer(skb, hoffset + tkey->at, 1,
-                                                      &_d);
+                               d = skb_header_pointer(skb, hoffset + tkey->at,
+                                                      sizeof(_d), &_d);
                                if (!d)
                                        goto bad;
                                offset += (*d & tkey->offmask) >> tkey->shift;
                        }
 
                        if (offset % 4) {
-                               pr_info("tc filter pedit"
-                                       " offset must be on 32 bit boundaries\n");
+                               pr_info("tc action pedit offset must be on 32 bit boundaries\n");
                                goto bad;
                        }
 
                        if (!offset_valid(skb, hoffset + offset)) {
-                               pr_info("tc filter pedit offset %d out of bounds\n",
+                               pr_info("tc action pedit offset %d out of bounds\n",
                                        hoffset + offset);
                                goto bad;
                        }
 
-                       ptr = skb_header_pointer(skb, hoffset + offset, 4, &_data);
+                       ptr = skb_header_pointer(skb, hoffset + offset,
+                                                sizeof(hdata), &hdata);
                        if (!ptr)
                                goto bad;
                        /* just do it, baby */
@@ -347,19 +371,20 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
                                val = (*ptr + tkey->val) & ~tkey->mask;
                                break;
                        default:
-                               pr_info("tc filter pedit bad command (%d)\n",
+                               pr_info("tc action pedit bad command (%d)\n",
                                        cmd);
                                goto bad;
                        }
 
                        *ptr = ((*ptr & tkey->mask) ^ val);
-                       if (ptr == &_data)
+                       if (ptr == &hdata)
                                skb_store_bits(skb, hoffset + offset, ptr, 4);
                }
 
                goto done;
-       } else
+       } else {
                WARN(1, "pedit BUG: index %d\n", p->tcf_index);
+       }
 
 bad:
        p->tcf_qstats.overlimits++;
@@ -391,8 +416,8 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
        opt->nkeys = p->tcfp_nkeys;
        opt->flags = p->tcfp_flags;
        opt->action = p->tcf_action;
-       opt->refcnt = p->tcf_refcnt - ref;
-       opt->bindcnt = p->tcf_bindcnt - bind;
+       opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
+       opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
 
        if (p->tcfp_keys_ex) {
                tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys);
@@ -435,6 +460,13 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_pedit_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_pedit_ops = {
        .kind           =       "pedit",
        .type           =       TCA_ACT_PEDIT,
@@ -445,6 +477,7 @@ static struct tc_action_ops act_pedit_ops = {
        .init           =       tcf_pedit_init,
        .walk           =       tcf_pedit_walker,
        .lookup         =       tcf_pedit_search,
+       .delete         =       tcf_pedit_delete,
        .size           =       sizeof(struct tcf_pedit),
 };
 
@@ -483,4 +516,3 @@ static void __exit pedit_cleanup_module(void)
 
 module_init(pedit_init_module);
 module_exit(pedit_cleanup_module);
-
index 4e72bc2a0dfb525df3cc4ac582f417ce5c537af3..1f3192ea8df7a5f4de28c297cac3337cfccfe601 100644 (file)
@@ -75,7 +75,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 
 static int tcf_act_police_init(struct net *net, struct nlattr *nla,
                               struct nlattr *est, struct tc_action **a,
-                              int ovr, int bind,
+                              int ovr, int bind, bool rtnl_held,
                               struct netlink_ext_ack *extack)
 {
        int ret = 0, err;
@@ -101,20 +101,24 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
                return -EINVAL;
 
        parm = nla_data(tb[TCA_POLICE_TBF]);
-       exists = tcf_idr_check(tn, parm->index, a, bind);
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (err < 0)
+               return err;
+       exists = err;
        if (exists && bind)
                return 0;
 
        if (!exists) {
                ret = tcf_idr_create(tn, parm->index, NULL, a,
                                     &act_police_ops, bind, false);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
                ret = ACT_P_CREATED;
-       } else {
+       } else if (!ovr) {
                tcf_idr_release(*a, bind);
-               if (!ovr)
-                       return -EEXIST;
+               return -EEXIST;
        }
 
        police = to_police(*a);
@@ -195,8 +199,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 failure:
        qdisc_put_rtab(P_tab);
        qdisc_put_rtab(R_tab);
-       if (ret == ACT_P_CREATED)
-               tcf_idr_release(*a, bind);
+       tcf_idr_release(*a, bind);
        return err;
 }
 
@@ -274,8 +277,8 @@ static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a,
                .action = police->tcf_action,
                .mtu = police->tcfp_mtu,
                .burst = PSCHED_NS2TICKS(police->tcfp_burst),
-               .refcnt = police->tcf_refcnt - ref,
-               .bindcnt = police->tcf_bindcnt - bind,
+               .refcnt = refcount_read(&police->tcf_refcnt) - ref,
+               .bindcnt = atomic_read(&police->tcf_bindcnt) - bind,
        };
        struct tcf_t t;
 
@@ -314,6 +317,13 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_police_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, police_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 MODULE_AUTHOR("Alexey Kuznetsov");
 MODULE_DESCRIPTION("Policing actions");
 MODULE_LICENSE("GPL");
@@ -327,6 +337,7 @@ static struct tc_action_ops act_police_ops = {
        .init           =       tcf_act_police_init,
        .walk           =       tcf_act_police_walker,
        .lookup         =       tcf_police_search,
+       .delete         =       tcf_police_delete,
        .size           =       sizeof(struct tcf_police),
 };
 
index 5db358497c9ee610c499c88d0dd6eff463ccd70d..2608ccc83e5e7796d565438d6d0b739654695297 100644 (file)
@@ -37,7 +37,8 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a, int ovr,
-                          int bind, struct netlink_ext_ack *extack)
+                          int bind, bool rtnl_held,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, sample_net_id);
        struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -45,7 +46,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
        struct tc_sample *parm;
        struct tcf_sample *s;
        bool exists = false;
-       int ret;
+       int ret, err;
 
        if (!nla)
                return -EINVAL;
@@ -58,20 +59,24 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
        parm = nla_data(tb[TCA_SAMPLE_PARMS]);
 
-       exists = tcf_idr_check(tn, parm->index, a, bind);
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (err < 0)
+               return err;
+       exists = err;
        if (exists && bind)
                return 0;
 
        if (!exists) {
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_sample_ops, bind, false);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
                ret = ACT_P_CREATED;
-       } else {
+       } else if (!ovr) {
                tcf_idr_release(*a, bind);
-               if (!ovr)
-                       return -EEXIST;
+               return -EEXIST;
        }
        s = to_sample(*a);
 
@@ -80,8 +85,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
        s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
        psample_group = psample_group_get(net, s->psample_group_num);
        if (!psample_group) {
-               if (ret == ACT_P_CREATED)
-                       tcf_idr_release(*a, bind);
+               tcf_idr_release(*a, bind);
                return -ENOMEM;
        }
        RCU_INIT_POINTER(s->psample_group, psample_group);
@@ -136,8 +140,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
        bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
        retval = READ_ONCE(s->tcf_action);
 
-       rcu_read_lock();
-       psample_group = rcu_dereference(s->psample_group);
+       psample_group = rcu_dereference_bh(s->psample_group);
 
        /* randomly sample packets according to rate */
        if (psample_group && (prandom_u32() % s->rate == 0)) {
@@ -161,7 +164,6 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
                        skb_pull(skb, skb->mac_len);
        }
 
-       rcu_read_unlock();
        return retval;
 }
 
@@ -173,8 +175,8 @@ static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a,
        struct tc_sample opt = {
                .index      = s->tcf_index,
                .action     = s->tcf_action,
-               .refcnt     = s->tcf_refcnt - ref,
-               .bindcnt    = s->tcf_bindcnt - bind,
+               .refcnt     = refcount_read(&s->tcf_refcnt) - ref,
+               .bindcnt    = atomic_read(&s->tcf_bindcnt) - bind,
        };
        struct tcf_t t;
 
@@ -219,6 +221,13 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_sample_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_sample_ops = {
        .kind     = "sample",
        .type     = TCA_ACT_SAMPLE,
@@ -229,6 +238,7 @@ static struct tc_action_ops act_sample_ops = {
        .cleanup  = tcf_sample_cleanup,
        .walk     = tcf_sample_walker,
        .lookup   = tcf_sample_search,
+       .delete   = tcf_sample_delete,
        .size     = sizeof(struct tcf_sample),
 };
 
index 98c4afe7c15b29a99a3d18e06934e34f0732110d..aa51152e00668e76cccc08e3592769f1f7874915 100644 (file)
@@ -79,7 +79,8 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, struct netlink_ext_ack *extack)
+                        int ovr, int bind, bool rtnl_held,
+                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, simp_net_id);
        struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -99,21 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
                return -EINVAL;
 
        parm = nla_data(tb[TCA_DEF_PARMS]);
-       exists = tcf_idr_check(tn, parm->index, a, bind);
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (err < 0)
+               return err;
+       exists = err;
        if (exists && bind)
                return 0;
 
        if (tb[TCA_DEF_DATA] == NULL) {
                if (exists)
                        tcf_idr_release(*a, bind);
+               else
+                       tcf_idr_cleanup(tn, parm->index);
                return -EINVAL;
        }
 
        if (!exists) {
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_simp_ops, bind, false);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
 
                d = to_defact(*a);
                ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
@@ -126,9 +134,10 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
        } else {
                d = to_defact(*a);
 
-               tcf_idr_release(*a, bind);
-               if (!ovr)
+               if (!ovr) {
+                       tcf_idr_release(*a, bind);
                        return -EEXIST;
+               }
 
                reset_policy(d, tb[TCA_DEF_DATA], parm);
        }
@@ -145,8 +154,8 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
        struct tcf_defact *d = to_defact(a);
        struct tc_defact opt = {
                .index   = d->tcf_index,
-               .refcnt  = d->tcf_refcnt - ref,
-               .bindcnt = d->tcf_bindcnt - bind,
+               .refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+               .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
                .action  = d->tcf_action,
        };
        struct tcf_t t;
@@ -183,6 +192,13 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_simp_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_simp_ops = {
        .kind           =       "simple",
        .type           =       TCA_ACT_SIMP,
@@ -193,6 +209,7 @@ static struct tc_action_ops act_simp_ops = {
        .init           =       tcf_simp_init,
        .walk           =       tcf_simp_walker,
        .lookup         =       tcf_simp_search,
+       .delete         =       tcf_simp_delete,
        .size           =       sizeof(struct tcf_defact),
 };
 
index 6138d1d71900b561f50578bf22110902bb488bf4..a6db47ebec112ad79413acf102f6593b5863f50c 100644 (file)
@@ -23,6 +23,9 @@
 #include <linux/rtnetlink.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/dsfield.h>
 
 #include <linux/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_skbedit.h>
@@ -34,25 +37,50 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
                       struct tcf_result *res)
 {
        struct tcf_skbedit *d = to_skbedit(a);
+       struct tcf_skbedit_params *params;
+       int action;
 
-       spin_lock(&d->tcf_lock);
        tcf_lastuse_update(&d->tcf_tm);
-       bstats_update(&d->tcf_bstats, skb);
-
-       if (d->flags & SKBEDIT_F_PRIORITY)
-               skb->priority = d->priority;
-       if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
-           skb->dev->real_num_tx_queues > d->queue_mapping)
-               skb_set_queue_mapping(skb, d->queue_mapping);
-       if (d->flags & SKBEDIT_F_MARK) {
-               skb->mark &= ~d->mask;
-               skb->mark |= d->mark & d->mask;
+       bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+
+       params = rcu_dereference_bh(d->params);
+       action = READ_ONCE(d->tcf_action);
+
+       if (params->flags & SKBEDIT_F_PRIORITY)
+               skb->priority = params->priority;
+       if (params->flags & SKBEDIT_F_INHERITDSFIELD) {
+               int wlen = skb_network_offset(skb);
+
+               switch (tc_skb_protocol(skb)) {
+               case htons(ETH_P_IP):
+                       wlen += sizeof(struct iphdr);
+                       if (!pskb_may_pull(skb, wlen))
+                               goto err;
+                       skb->priority = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+                       break;
+
+               case htons(ETH_P_IPV6):
+                       wlen += sizeof(struct ipv6hdr);
+                       if (!pskb_may_pull(skb, wlen))
+                               goto err;
+                       skb->priority = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+                       break;
+               }
        }
-       if (d->flags & SKBEDIT_F_PTYPE)
-               skb->pkt_type = d->ptype;
+       if (params->flags & SKBEDIT_F_QUEUE_MAPPING &&
+           skb->dev->real_num_tx_queues > params->queue_mapping)
+               skb_set_queue_mapping(skb, params->queue_mapping);
+       if (params->flags & SKBEDIT_F_MARK) {
+               skb->mark &= ~params->mask;
+               skb->mark |= params->mark & params->mask;
+       }
+       if (params->flags & SKBEDIT_F_PTYPE)
+               skb->pkt_type = params->ptype;
+       return action;
 
-       spin_unlock(&d->tcf_lock);
-       return d->tcf_action;
+err:
+       qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
+       return TC_ACT_SHOT;
 }
 
 static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
@@ -62,13 +90,16 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
        [TCA_SKBEDIT_MARK]              = { .len = sizeof(u32) },
        [TCA_SKBEDIT_PTYPE]             = { .len = sizeof(u16) },
        [TCA_SKBEDIT_MASK]              = { .len = sizeof(u32) },
+       [TCA_SKBEDIT_FLAGS]             = { .len = sizeof(u64) },
 };
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                            struct nlattr *est, struct tc_action **a,
-                           int ovr, int bind, struct netlink_ext_ack *extack)
+                           int ovr, int bind, bool rtnl_held,
+                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+       struct tcf_skbedit_params *params_old, *params_new;
        struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
        struct tc_skbedit *parm;
        struct tcf_skbedit *d;
@@ -114,52 +145,76 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                mask = nla_data(tb[TCA_SKBEDIT_MASK]);
        }
 
+       if (tb[TCA_SKBEDIT_FLAGS] != NULL) {
+               u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]);
+
+               if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
+                       flags |= SKBEDIT_F_INHERITDSFIELD;
+       }
+
        parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
-       exists = tcf_idr_check(tn, parm->index, a, bind);
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (err < 0)
+               return err;
+       exists = err;
        if (exists && bind)
                return 0;
 
        if (!flags) {
                if (exists)
                        tcf_idr_release(*a, bind);
+               else
+                       tcf_idr_cleanup(tn, parm->index);
                return -EINVAL;
        }
 
        if (!exists) {
                ret = tcf_idr_create(tn, parm->index, est, a,
-                                    &act_skbedit_ops, bind, false);
-               if (ret)
+                                    &act_skbedit_ops, bind, true);
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
 
                d = to_skbedit(*a);
                ret = ACT_P_CREATED;
        } else {
                d = to_skbedit(*a);
-               tcf_idr_release(*a, bind);
-               if (!ovr)
+               if (!ovr) {
+                       tcf_idr_release(*a, bind);
                        return -EEXIST;
+               }
        }
 
-       spin_lock_bh(&d->tcf_lock);
+       ASSERT_RTNL();
+
+       params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+       if (unlikely(!params_new)) {
+               if (ret == ACT_P_CREATED)
+                       tcf_idr_release(*a, bind);
+               return -ENOMEM;
+       }
 
-       d->flags = flags;
+       params_new->flags = flags;
        if (flags & SKBEDIT_F_PRIORITY)
-               d->priority = *priority;
+               params_new->priority = *priority;
        if (flags & SKBEDIT_F_QUEUE_MAPPING)
-               d->queue_mapping = *queue_mapping;
+               params_new->queue_mapping = *queue_mapping;
        if (flags & SKBEDIT_F_MARK)
-               d->mark = *mark;
+               params_new->mark = *mark;
        if (flags & SKBEDIT_F_PTYPE)
-               d->ptype = *ptype;
+               params_new->ptype = *ptype;
        /* default behaviour is to use all the bits */
-       d->mask = 0xffffffff;
+       params_new->mask = 0xffffffff;
        if (flags & SKBEDIT_F_MASK)
-               d->mask = *mask;
+               params_new->mask = *mask;
 
        d->tcf_action = parm->action;
-
-       spin_unlock_bh(&d->tcf_lock);
+       params_old = rtnl_dereference(d->params);
+       rcu_assign_pointer(d->params, params_new);
+       if (params_old)
+               kfree_rcu(params_old, rcu);
 
        if (ret == ACT_P_CREATED)
                tcf_idr_insert(tn, *a);
@@ -171,30 +226,39 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tcf_skbedit *d = to_skbedit(a);
+       struct tcf_skbedit_params *params;
        struct tc_skbedit opt = {
                .index   = d->tcf_index,
-               .refcnt  = d->tcf_refcnt - ref,
-               .bindcnt = d->tcf_bindcnt - bind,
+               .refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+               .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
                .action  = d->tcf_action,
        };
+       u64 pure_flags = 0;
        struct tcf_t t;
 
+       params = rtnl_dereference(d->params);
+
        if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
                goto nla_put_failure;
-       if ((d->flags & SKBEDIT_F_PRIORITY) &&
-           nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority))
+       if ((params->flags & SKBEDIT_F_PRIORITY) &&
+           nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, params->priority))
                goto nla_put_failure;
-       if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) &&
-           nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping))
+       if ((params->flags & SKBEDIT_F_QUEUE_MAPPING) &&
+           nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, params->queue_mapping))
                goto nla_put_failure;
-       if ((d->flags & SKBEDIT_F_MARK) &&
-           nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark))
+       if ((params->flags & SKBEDIT_F_MARK) &&
+           nla_put_u32(skb, TCA_SKBEDIT_MARK, params->mark))
                goto nla_put_failure;
-       if ((d->flags & SKBEDIT_F_PTYPE) &&
-           nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
+       if ((params->flags & SKBEDIT_F_PTYPE) &&
+           nla_put_u16(skb, TCA_SKBEDIT_PTYPE, params->ptype))
                goto nla_put_failure;
-       if ((d->flags & SKBEDIT_F_MASK) &&
-           nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask))
+       if ((params->flags & SKBEDIT_F_MASK) &&
+           nla_put_u32(skb, TCA_SKBEDIT_MASK, params->mask))
+               goto nla_put_failure;
+       if (params->flags & SKBEDIT_F_INHERITDSFIELD)
+               pure_flags |= SKBEDIT_F_INHERITDSFIELD;
+       if (pure_flags != 0 &&
+           nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
                goto nla_put_failure;
 
        tcf_tm_dump(&t, &d->tcf_tm);
@@ -207,6 +271,16 @@ nla_put_failure:
        return -1;
 }
 
+static void tcf_skbedit_cleanup(struct tc_action *a)
+{
+       struct tcf_skbedit *d = to_skbedit(a);
+       struct tcf_skbedit_params *params;
+
+       params = rcu_dereference_protected(d->params, 1);
+       if (params)
+               kfree_rcu(params, rcu);
+}
+
 static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
                              struct netlink_callback *cb, int type,
                              const struct tc_action_ops *ops,
@@ -225,6 +299,13 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_skbedit_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_skbedit_ops = {
        .kind           =       "skbedit",
        .type           =       TCA_ACT_SKBEDIT,
@@ -232,8 +313,10 @@ static struct tc_action_ops act_skbedit_ops = {
        .act            =       tcf_skbedit,
        .dump           =       tcf_skbedit_dump,
        .init           =       tcf_skbedit_init,
+       .cleanup        =       tcf_skbedit_cleanup,
        .walk           =       tcf_skbedit_walker,
        .lookup         =       tcf_skbedit_search,
+       .delete         =       tcf_skbedit_delete,
        .size           =       sizeof(struct tcf_skbedit),
 };
 
index ad050d7d4b46a2d45f85e15bb7e68d28915f1d54..c437c6d51a71763bf77e8350e41ccf6f5be000e3 100644 (file)
@@ -41,20 +41,14 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
         * then MAX_EDIT_LEN needs to change appropriately
        */
        err = skb_ensure_writable(skb, MAX_EDIT_LEN);
-       if (unlikely(err)) { /* best policy is to drop on the floor */
-               qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
-               return TC_ACT_SHOT;
-       }
+       if (unlikely(err)) /* best policy is to drop on the floor */
+               goto drop;
 
-       rcu_read_lock();
        action = READ_ONCE(d->tcf_action);
-       if (unlikely(action == TC_ACT_SHOT)) {
-               qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
-               rcu_read_unlock();
-               return action;
-       }
+       if (unlikely(action == TC_ACT_SHOT))
+               goto drop;
 
-       p = rcu_dereference(d->skbmod_p);
+       p = rcu_dereference_bh(d->skbmod_p);
        flags = p->flags;
        if (flags & SKBMOD_F_DMAC)
                ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
@@ -62,7 +56,6 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
                ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
        if (flags & SKBMOD_F_ETYPE)
                eth_hdr(skb)->h_proto = p->eth_type;
-       rcu_read_unlock();
 
        if (flags & SKBMOD_F_SWAPMAC) {
                u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
@@ -73,6 +66,10 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
        }
 
        return action;
+
+drop:
+       qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+       return TC_ACT_SHOT;
 }
 
 static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
@@ -84,7 +81,8 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, struct netlink_ext_ack *extack)
+                          int ovr, int bind, bool rtnl_held,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbmod_net_id);
        struct nlattr *tb[TCA_SKBMOD_MAX + 1];
@@ -127,27 +125,33 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
        if (parm->flags & SKBMOD_F_SWAPMAC)
                lflags = SKBMOD_F_SWAPMAC;
 
-       exists = tcf_idr_check(tn, parm->index, a, bind);
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (err < 0)
+               return err;
+       exists = err;
        if (exists && bind)
                return 0;
 
        if (!lflags) {
                if (exists)
                        tcf_idr_release(*a, bind);
+               else
+                       tcf_idr_cleanup(tn, parm->index);
                return -EINVAL;
        }
 
        if (!exists) {
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_skbmod_ops, bind, true);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
 
                ret = ACT_P_CREATED;
-       } else {
+       } else if (!ovr) {
                tcf_idr_release(*a, bind);
-               if (!ovr)
-                       return -EEXIST;
+               return -EEXIST;
        }
 
        d = to_skbmod(*a);
@@ -155,8 +159,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
        ASSERT_RTNL();
        p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
        if (unlikely(!p)) {
-               if (ret == ACT_P_CREATED)
-                       tcf_idr_release(*a, bind);
+               tcf_idr_release(*a, bind);
                return -ENOMEM;
        }
 
@@ -205,8 +208,8 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
        struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
        struct tc_skbmod opt = {
                .index   = d->tcf_index,
-               .refcnt  = d->tcf_refcnt - ref,
-               .bindcnt = d->tcf_bindcnt - bind,
+               .refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+               .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
                .action  = d->tcf_action,
        };
        struct tcf_t t;
@@ -252,6 +255,13 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_skbmod_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_skbmod_ops = {
        .kind           =       "skbmod",
        .type           =       TCA_ACT_SKBMOD,
@@ -262,6 +272,7 @@ static struct tc_action_ops act_skbmod_ops = {
        .cleanup        =       tcf_skbmod_cleanup,
        .walk           =       tcf_skbmod_walker,
        .lookup         =       tcf_skbmod_search,
+       .delete         =       tcf_skbmod_delete,
        .size           =       sizeof(struct tcf_skbmod),
 };
 
index 9bc6c2ae98a56ceb2a4719be91a1937b5441a58d..d42d9e112789f3ca4d3037d395a8e30a1a31b989 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <net/geneve.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
@@ -30,9 +31,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
        struct tcf_tunnel_key_params *params;
        int action;
 
-       rcu_read_lock();
-
-       params = rcu_dereference(t->params);
+       params = rcu_dereference_bh(t->params);
 
        tcf_lastuse_update(&t->tcf_tm);
        bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
@@ -52,11 +51,138 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
                break;
        }
 
-       rcu_read_unlock();
-
        return action;
 }
 
+static const struct nla_policy
+enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = {
+       [TCA_TUNNEL_KEY_ENC_OPTS_GENEVE]        = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = {
+       [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]      = { .type = NLA_U16 },
+       [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]       = { .type = NLA_U8 },
+       [TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]       = { .type = NLA_BINARY,
+                                                      .len = 128 },
+};
+
+static int
+tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len,
+                          struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1];
+       int err, data_len, opt_len;
+       u8 *data;
+
+       err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX,
+                              nla, geneve_opt_policy, extack);
+       if (err < 0)
+               return err;
+
+       if (!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] ||
+           !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] ||
+           !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]) {
+               NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
+               return -EINVAL;
+       }
+
+       data = nla_data(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
+       data_len = nla_len(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
+       if (data_len < 4) {
+               NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
+               return -ERANGE;
+       }
+       if (data_len % 4) {
+               NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
+               return -ERANGE;
+       }
+
+       opt_len = sizeof(struct geneve_opt) + data_len;
+       if (dst) {
+               struct geneve_opt *opt = dst;
+
+               WARN_ON(dst_len < opt_len);
+
+               opt->opt_class =
+                       nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]);
+               opt->type = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]);
+               opt->length = data_len / 4; /* length is in units of 4 bytes */
+               opt->r1 = 0;
+               opt->r2 = 0;
+               opt->r3 = 0;
+
+               memcpy(opt + 1, data, data_len);
+       }
+
+       return opt_len;
+}
+
+static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
+                               int dst_len, struct netlink_ext_ack *extack)
+{
+       int err, rem, opt_len, len = nla_len(nla), opts_len = 0;
+       const struct nlattr *attr, *head = nla_data(nla);
+
+       err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX,
+                          enc_opts_policy, extack);
+       if (err)
+               return err;
+
+       nla_for_each_attr(attr, head, len, rem) {
+               switch (nla_type(attr)) {
+               case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
+                       opt_len = tunnel_key_copy_geneve_opt(attr, dst,
+                                                            dst_len, extack);
+                       if (opt_len < 0)
+                               return opt_len;
+                       opts_len += opt_len;
+                       if (dst) {
+                               dst_len -= opt_len;
+                               dst += opt_len;
+                       }
+                       break;
+               }
+       }
+
+       if (!opts_len) {
+               NL_SET_ERR_MSG(extack, "Empty list of tunnel options");
+               return -EINVAL;
+       }
+
+       if (rem > 0) {
+               NL_SET_ERR_MSG(extack, "Trailing data after parsing tunnel key options attributes");
+               return -EINVAL;
+       }
+
+       return opts_len;
+}
+
+static int tunnel_key_get_opts_len(struct nlattr *nla,
+                                  struct netlink_ext_ack *extack)
+{
+       return tunnel_key_copy_opts(nla, NULL, 0, extack);
+}
+
+static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
+                              int opts_len, struct netlink_ext_ack *extack)
+{
+       info->options_len = opts_len;
+       switch (nla_type(nla_data(nla))) {
+       case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
+#if IS_ENABLED(CONFIG_INET)
+               info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+               return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+                                           opts_len, extack);
+#else
+               return -EAFNOSUPPORT;
+#endif
+       default:
+               NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type");
+               return -EINVAL;
+       }
+}
+
 static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
        [TCA_TUNNEL_KEY_PARMS]      = { .len = sizeof(struct tc_tunnel_key) },
        [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
@@ -66,11 +192,15 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
        [TCA_TUNNEL_KEY_ENC_KEY_ID]   = { .type = NLA_U32 },
        [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
        [TCA_TUNNEL_KEY_NO_CSUM]      = { .type = NLA_U8 },
+       [TCA_TUNNEL_KEY_ENC_OPTS]     = { .type = NLA_NESTED },
+       [TCA_TUNNEL_KEY_ENC_TOS]      = { .type = NLA_U8 },
+       [TCA_TUNNEL_KEY_ENC_TTL]      = { .type = NLA_U8 },
 };
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, struct netlink_ext_ack *extack)
+                          int ovr, int bind, bool rtnl_held,
+                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
        struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
@@ -81,24 +211,35 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
        struct tcf_tunnel_key *t;
        bool exists = false;
        __be16 dst_port = 0;
+       int opts_len = 0;
        __be64 key_id;
        __be16 flags;
+       u8 tos, ttl;
        int ret = 0;
        int err;
 
-       if (!nla)
+       if (!nla) {
+               NL_SET_ERR_MSG(extack, "Tunnel requires attributes to be passed");
                return -EINVAL;
+       }
 
        err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy,
-                              NULL);
-       if (err < 0)
+                              extack);
+       if (err < 0) {
+               NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes");
                return err;
+       }
 
-       if (!tb[TCA_TUNNEL_KEY_PARMS])
+       if (!tb[TCA_TUNNEL_KEY_PARMS]) {
+               NL_SET_ERR_MSG(extack, "Missing tunnel key parameters");
                return -EINVAL;
+       }
 
        parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
-       exists = tcf_idr_check(tn, parm->index, a, bind);
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (err < 0)
+               return err;
+       exists = err;
        if (exists && bind)
                return 0;
 
@@ -107,6 +248,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                break;
        case TCA_TUNNEL_KEY_ACT_SET:
                if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
+                       NL_SET_ERR_MSG(extack, "Missing tunnel key id");
                        ret = -EINVAL;
                        goto err_out;
                }
@@ -121,6 +263,22 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
                        dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
 
+               if (tb[TCA_TUNNEL_KEY_ENC_OPTS]) {
+                       opts_len = tunnel_key_get_opts_len(tb[TCA_TUNNEL_KEY_ENC_OPTS],
+                                                          extack);
+                       if (opts_len < 0) {
+                               ret = opts_len;
+                               goto err_out;
+                       }
+               }
+
+               tos = 0;
+               if (tb[TCA_TUNNEL_KEY_ENC_TOS])
+                       tos = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TOS]);
+               ttl = 0;
+               if (tb[TCA_TUNNEL_KEY_ENC_TTL])
+                       ttl = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TTL]);
+
                if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
                    tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
                        __be32 saddr;
@@ -129,9 +287,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                        saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
                        daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
 
-                       metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
+                       metadata = __ip_tun_set_dst(saddr, daddr, tos, ttl,
                                                    dst_port, flags,
-                                                   key_id, 0);
+                                                   key_id, opts_len);
                } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
                           tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
                        struct in6_addr saddr;
@@ -140,19 +298,33 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                        saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
                        daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
 
-                       metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port,
+                       metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
                                                      0, flags,
                                                      key_id, 0);
+               } else {
+                       NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst");
+                       ret = -EINVAL;
+                       goto err_out;
                }
 
                if (!metadata) {
-                       ret = -EINVAL;
+                       NL_SET_ERR_MSG(extack, "Cannot allocate tunnel metadata dst");
+                       ret = -ENOMEM;
                        goto err_out;
                }
 
+               if (opts_len) {
+                       ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS],
+                                                 &metadata->u.tun_info,
+                                                 opts_len, extack);
+                       if (ret < 0)
+                               goto err_out;
+               }
+
                metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
                break;
        default:
+               NL_SET_ERR_MSG(extack, "Unknown tunnel key action");
                ret = -EINVAL;
                goto err_out;
        }
@@ -160,14 +332,16 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
        if (!exists) {
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_tunnel_key_ops, bind, true);
-               if (ret)
-                       return ret;
+               if (ret) {
+                       NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
+                       goto err_out;
+               }
 
                ret = ACT_P_CREATED;
-       } else {
+       } else if (!ovr) {
                tcf_idr_release(*a, bind);
-               if (!ovr)
-                       return -EEXIST;
+               NL_SET_ERR_MSG(extack, "TC IDR already exists");
+               return -EEXIST;
        }
 
        t = to_tunnel_key(*a);
@@ -175,8 +349,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
        ASSERT_RTNL();
        params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
        if (unlikely(!params_new)) {
-               if (ret == ACT_P_CREATED)
-                       tcf_idr_release(*a, bind);
+               tcf_idr_release(*a, bind);
+               NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
                return -ENOMEM;
        }
 
@@ -199,6 +373,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 err_out:
        if (exists)
                tcf_idr_release(*a, bind);
+       else
+               tcf_idr_cleanup(tn, parm->index);
        return ret;
 }
 
@@ -216,6 +392,61 @@ static void tunnel_key_release(struct tc_action *a)
        }
 }
 
+static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
+                                      const struct ip_tunnel_info *info)
+{
+       int len = info->options_len;
+       u8 *src = (u8 *)(info + 1);
+       struct nlattr *start;
+
+       start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
+       if (!start)
+               return -EMSGSIZE;
+
+       while (len > 0) {
+               struct geneve_opt *opt = (struct geneve_opt *)src;
+
+               if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,
+                                opt->opt_class) ||
+                   nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE,
+                              opt->type) ||
+                   nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA,
+                           opt->length * 4, opt + 1))
+                       return -EMSGSIZE;
+
+               len -= sizeof(struct geneve_opt) + opt->length * 4;
+               src += sizeof(struct geneve_opt) + opt->length * 4;
+       }
+
+       nla_nest_end(skb, start);
+       return 0;
+}
+
+static int tunnel_key_opts_dump(struct sk_buff *skb,
+                               const struct ip_tunnel_info *info)
+{
+       struct nlattr *start;
+       int err;
+
+       if (!info->options_len)
+               return 0;
+
+       start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS);
+       if (!start)
+               return -EMSGSIZE;
+
+       if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+               err = tunnel_key_geneve_opts_dump(skb, info);
+               if (err)
+                       return err;
+       } else {
+               return -EINVAL;
+       }
+
+       nla_nest_end(skb, start);
+       return 0;
+}
+
 static int tunnel_key_dump_addresses(struct sk_buff *skb,
                                     const struct ip_tunnel_info *info)
 {
@@ -252,8 +483,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
        struct tcf_tunnel_key_params *params;
        struct tc_tunnel_key opt = {
                .index    = t->tcf_index,
-               .refcnt   = t->tcf_refcnt - ref,
-               .bindcnt  = t->tcf_bindcnt - bind,
+               .refcnt   = refcount_read(&t->tcf_refcnt) - ref,
+               .bindcnt  = atomic_read(&t->tcf_bindcnt) - bind,
                .action   = t->tcf_action,
        };
        struct tcf_t tm;
@@ -266,8 +497,9 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
                goto nla_put_failure;
 
        if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
-               struct ip_tunnel_key *key =
-                       &params->tcft_enc_metadata->u.tun_info.key;
+               struct ip_tunnel_info *info =
+                       &params->tcft_enc_metadata->u.tun_info;
+               struct ip_tunnel_key *key = &info->key;
                __be32 key_id = tunnel_id_to_key32(key->tun_id);
 
                if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
@@ -275,7 +507,14 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
                                              &params->tcft_enc_metadata->u.tun_info) ||
                    nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst) ||
                    nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
-                              !(key->tun_flags & TUNNEL_CSUM)))
+                              !(key->tun_flags & TUNNEL_CSUM)) ||
+                   tunnel_key_opts_dump(skb, info))
+                       goto nla_put_failure;
+
+               if (key->tos && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TOS, key->tos))
+                       goto nla_put_failure;
+
+               if (key->ttl && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TTL, key->ttl))
                        goto nla_put_failure;
        }
 
@@ -309,6 +548,13 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tunnel_key_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_tunnel_key_ops = {
        .kind           =       "tunnel_key",
        .type           =       TCA_ACT_TUNNEL_KEY,
@@ -319,6 +565,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
        .cleanup        =       tunnel_key_release,
        .walk           =       tunnel_key_walker,
        .lookup         =       tunnel_key_search,
+       .delete         =       tunnel_key_delete,
        .size           =       sizeof(struct tcf_tunnel_key),
 };
 
index 1fb39e1f9d077beb4fdb440459f18116b561f334..15a0ee214c9cfc171300dbe2266569f7a4387d3c 100644 (file)
@@ -40,11 +40,9 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
        if (skb_at_tc_ingress(skb))
                skb_push_rcsum(skb, skb->mac_len);
 
-       rcu_read_lock();
-
        action = READ_ONCE(v->tcf_action);
 
-       p = rcu_dereference(v->vlan_p);
+       p = rcu_dereference_bh(v->vlan_p);
 
        switch (p->tcfv_action) {
        case TCA_VLAN_ACT_POP:
@@ -61,7 +59,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
        case TCA_VLAN_ACT_MODIFY:
                /* No-op if no vlan tag (either hw-accel or in-payload) */
                if (!skb_vlan_tagged(skb))
-                       goto unlock;
+                       goto out;
                /* extract existing tag (and guarantee no hw-accel tag) */
                if (skb_vlan_tag_present(skb)) {
                        tci = skb_vlan_tag_get(skb);
@@ -86,18 +84,15 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
                BUG();
        }
 
-       goto unlock;
-
-drop:
-       action = TC_ACT_SHOT;
-       qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
-
-unlock:
-       rcu_read_unlock();
+out:
        if (skb_at_tc_ingress(skb))
                skb_pull_rcsum(skb, skb->mac_len);
 
        return action;
+
+drop:
+       qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+       return TC_ACT_SHOT;
 }
 
 static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
@@ -109,7 +104,8 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, struct netlink_ext_ack *extack)
+                        int ovr, int bind, bool rtnl_held,
+                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, vlan_net_id);
        struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -133,7 +129,10 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
        if (!tb[TCA_VLAN_PARMS])
                return -EINVAL;
        parm = nla_data(tb[TCA_VLAN_PARMS]);
-       exists = tcf_idr_check(tn, parm->index, a, bind);
+       err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+       if (err < 0)
+               return err;
+       exists = err;
        if (exists && bind)
                return 0;
 
@@ -145,12 +144,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
                        if (exists)
                                tcf_idr_release(*a, bind);
+                       else
+                               tcf_idr_cleanup(tn, parm->index);
                        return -EINVAL;
                }
                push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
                if (push_vid >= VLAN_VID_MASK) {
                        if (exists)
                                tcf_idr_release(*a, bind);
+                       else
+                               tcf_idr_cleanup(tn, parm->index);
                        return -ERANGE;
                }
 
@@ -163,6 +166,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                        default:
                                if (exists)
                                        tcf_idr_release(*a, bind);
+                               else
+                                       tcf_idr_cleanup(tn, parm->index);
                                return -EPROTONOSUPPORT;
                        }
                } else {
@@ -175,6 +180,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
        default:
                if (exists)
                        tcf_idr_release(*a, bind);
+               else
+                       tcf_idr_cleanup(tn, parm->index);
                return -EINVAL;
        }
        action = parm->v_action;
@@ -182,14 +189,15 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
        if (!exists) {
                ret = tcf_idr_create(tn, parm->index, est, a,
                                     &act_vlan_ops, bind, true);
-               if (ret)
+               if (ret) {
+                       tcf_idr_cleanup(tn, parm->index);
                        return ret;
+               }
 
                ret = ACT_P_CREATED;
-       } else {
+       } else if (!ovr) {
                tcf_idr_release(*a, bind);
-               if (!ovr)
-                       return -EEXIST;
+               return -EEXIST;
        }
 
        v = to_vlan(*a);
@@ -197,8 +205,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
        ASSERT_RTNL();
        p = kzalloc(sizeof(*p), GFP_KERNEL);
        if (!p) {
-               if (ret == ACT_P_CREATED)
-                       tcf_idr_release(*a, bind);
+               tcf_idr_release(*a, bind);
                return -ENOMEM;
        }
 
@@ -239,8 +246,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
        struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
        struct tc_vlan opt = {
                .index    = v->tcf_index,
-               .refcnt   = v->tcf_refcnt - ref,
-               .bindcnt  = v->tcf_bindcnt - bind,
+               .refcnt   = refcount_read(&v->tcf_refcnt) - ref,
+               .bindcnt  = atomic_read(&v->tcf_bindcnt) - bind,
                .action   = v->tcf_action,
                .v_action = p->tcfv_action,
        };
@@ -286,6 +293,13 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
        return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_vlan_delete(struct net *net, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+       return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_vlan_ops = {
        .kind           =       "vlan",
        .type           =       TCA_ACT_VLAN,
@@ -296,6 +310,7 @@ static struct tc_action_ops act_vlan_ops = {
        .cleanup        =       tcf_vlan_cleanup,
        .walk           =       tcf_vlan_walker,
        .lookup         =       tcf_vlan_search,
+       .delete         =       tcf_vlan_delete,
        .size           =       sizeof(struct tcf_vlan),
 };
 
index f74513a7c7a8ed179bfbeabb17fe60dd2f9b6eb2..194c2e0b27372878448979bfdeee3fadb932cd52 100644 (file)
@@ -39,7 +39,7 @@ static DEFINE_RWLOCK(cls_mod_lock);
 
 /* Find classifier type by string name */
 
-static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
+static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
 {
        const struct tcf_proto_ops *t, *res = NULL;
 
@@ -57,6 +57,33 @@ static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
        return res;
 }
 
+static const struct tcf_proto_ops *
+tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
+{
+       const struct tcf_proto_ops *ops;
+
+       ops = __tcf_proto_lookup_ops(kind);
+       if (ops)
+               return ops;
+#ifdef CONFIG_MODULES
+       rtnl_unlock();
+       request_module("cls_%s", kind);
+       rtnl_lock();
+       ops = __tcf_proto_lookup_ops(kind);
+       /* We dropped the RTNL semaphore in order to perform
+        * the module load. So, even if we succeeded in loading
+        * the module we have to replay the request. We indicate
+        * this using -EAGAIN.
+        */
+       if (ops) {
+               module_put(ops->owner);
+               return ERR_PTR(-EAGAIN);
+       }
+#endif
+       NL_SET_ERR_MSG(extack, "TC classifier not found");
+       return ERR_PTR(-ENOENT);
+}
+
 /* Register(unregister) new classifier type */
 
 int register_tcf_proto_ops(struct tcf_proto_ops *ops)
@@ -133,27 +160,9 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
        if (!tp)
                return ERR_PTR(-ENOBUFS);
 
-       err = -ENOENT;
-       tp->ops = tcf_proto_lookup_ops(kind);
-       if (!tp->ops) {
-#ifdef CONFIG_MODULES
-               rtnl_unlock();
-               request_module("cls_%s", kind);
-               rtnl_lock();
-               tp->ops = tcf_proto_lookup_ops(kind);
-               /* We dropped the RTNL semaphore in order to perform
-                * the module load. So, even if we succeeded in loading
-                * the module we have to replay the request. We indicate
-                * this using -EAGAIN.
-                */
-               if (tp->ops) {
-                       module_put(tp->ops->owner);
-                       err = -EAGAIN;
-               } else {
-                       NL_SET_ERR_MSG(extack, "TC classifier not found");
-                       err = -ENOENT;
-               }
-#endif
+       tp->ops = tcf_proto_lookup_ops(kind, extack);
+       if (IS_ERR(tp->ops)) {
+               err = PTR_ERR(tp->ops);
                goto errout;
        }
        tp->classify = tp->ops->classify;
@@ -195,11 +204,12 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
        chain = kzalloc(sizeof(*chain), GFP_KERNEL);
        if (!chain)
                return NULL;
-       INIT_LIST_HEAD(&chain->filter_chain_list);
        list_add_tail(&chain->list, &block->chain_list);
        chain->block = block;
        chain->index = chain_index;
        chain->refcnt = 1;
+       if (!chain->index)
+               block->chain0.chain = chain;
        return chain;
 }
 
@@ -209,35 +219,28 @@ static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
        if (item->chain_head_change)
                item->chain_head_change(tp_head, item->chain_head_change_priv);
 }
-static void tcf_chain_head_change(struct tcf_chain *chain,
-                                 struct tcf_proto *tp_head)
+
+static void tcf_chain0_head_change(struct tcf_chain *chain,
+                                  struct tcf_proto *tp_head)
 {
        struct tcf_filter_chain_list_item *item;
+       struct tcf_block *block = chain->block;
 
-       list_for_each_entry(item, &chain->filter_chain_list, list)
+       if (chain->index)
+               return;
+       list_for_each_entry(item, &block->chain0.filter_chain_list, list)
                tcf_chain_head_change_item(item, tp_head);
 }
 
-static void tcf_chain_flush(struct tcf_chain *chain)
-{
-       struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
-
-       tcf_chain_head_change(chain, NULL);
-       while (tp) {
-               RCU_INIT_POINTER(chain->filter_chain, tp->next);
-               tcf_proto_destroy(tp, NULL);
-               tp = rtnl_dereference(chain->filter_chain);
-               tcf_chain_put(chain);
-       }
-}
-
 static void tcf_chain_destroy(struct tcf_chain *chain)
 {
        struct tcf_block *block = chain->block;
 
        list_del(&chain->list);
+       if (!chain->index)
+               block->chain0.chain = NULL;
        kfree(chain);
-       if (list_empty(&block->chain_list))
+       if (list_empty(&block->chain_list) && block->refcnt == 0)
                kfree(block);
 }
 
@@ -246,28 +249,119 @@ static void tcf_chain_hold(struct tcf_chain *chain)
        ++chain->refcnt;
 }
 
-struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
-                               bool create)
+static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
+{
+       /* In case all the references are action references, this
+        * chain should not be shown to the user.
+        */
+       return chain->refcnt == chain->action_refcnt;
+}
+
+static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
+                                         u32 chain_index)
 {
        struct tcf_chain *chain;
 
        list_for_each_entry(chain, &block->chain_list, list) {
-               if (chain->index == chain_index) {
-                       tcf_chain_hold(chain);
+               if (chain->index == chain_index)
                        return chain;
-               }
        }
+       return NULL;
+}
 
-       return create ? tcf_chain_create(block, chain_index) : NULL;
+static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
+                          u32 seq, u16 flags, int event, bool unicast);
+
+static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
+                                        u32 chain_index, bool create,
+                                        bool by_act)
+{
+       struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
+
+       if (chain) {
+               tcf_chain_hold(chain);
+       } else {
+               if (!create)
+                       return NULL;
+               chain = tcf_chain_create(block, chain_index);
+               if (!chain)
+                       return NULL;
+       }
+
+       if (by_act)
+               ++chain->action_refcnt;
+
+       /* Send notification only in case we got the first
+        * non-action reference. Until then, the chain acts only as
+        * a placeholder for actions pointing to it and user ought
+        * not know about them.
+        */
+       if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
+               tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
+                               RTM_NEWCHAIN, false);
+
+       return chain;
 }
-EXPORT_SYMBOL(tcf_chain_get);
 
-void tcf_chain_put(struct tcf_chain *chain)
+static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
+                                      bool create)
 {
-       if (--chain->refcnt == 0)
+       return __tcf_chain_get(block, chain_index, create, false);
+}
+
+struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
+{
+       return __tcf_chain_get(block, chain_index, true, true);
+}
+EXPORT_SYMBOL(tcf_chain_get_by_act);
+
+static void tc_chain_tmplt_del(struct tcf_chain *chain);
+
+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
+{
+       if (by_act)
+               chain->action_refcnt--;
+       chain->refcnt--;
+
+       /* The last dropped non-action reference will trigger notification. */
+       if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
+               tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
+
+       if (chain->refcnt == 0) {
+               tc_chain_tmplt_del(chain);
                tcf_chain_destroy(chain);
+       }
+}
+
+static void tcf_chain_put(struct tcf_chain *chain)
+{
+       __tcf_chain_put(chain, false);
+}
+
+void tcf_chain_put_by_act(struct tcf_chain *chain)
+{
+       __tcf_chain_put(chain, true);
+}
+EXPORT_SYMBOL(tcf_chain_put_by_act);
+
+static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
+{
+       if (chain->explicitly_created)
+               tcf_chain_put(chain);
+}
+
+static void tcf_chain_flush(struct tcf_chain *chain)
+{
+       struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
+
+       tcf_chain0_head_change(chain, NULL);
+       while (tp) {
+               RCU_INIT_POINTER(chain->filter_chain, tp->next);
+               tcf_proto_destroy(tp, NULL);
+               tp = rtnl_dereference(chain->filter_chain);
+               tcf_chain_put(chain);
+       }
 }
-EXPORT_SYMBOL(tcf_chain_put);
 
 static bool tcf_block_offload_in_use(struct tcf_block *block)
 {
@@ -277,18 +371,21 @@ static bool tcf_block_offload_in_use(struct tcf_block *block)
 static int tcf_block_offload_cmd(struct tcf_block *block,
                                 struct net_device *dev,
                                 struct tcf_block_ext_info *ei,
-                                enum tc_block_command command)
+                                enum tc_block_command command,
+                                struct netlink_ext_ack *extack)
 {
        struct tc_block_offload bo = {};
 
        bo.command = command;
        bo.binder_type = ei->binder_type;
        bo.block = block;
+       bo.extack = extack;
        return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 }
 
 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
-                                 struct tcf_block_ext_info *ei)
+                                 struct tcf_block_ext_info *ei,
+                                 struct netlink_ext_ack *extack)
 {
        struct net_device *dev = q->dev_queue->dev;
        int err;
@@ -299,10 +396,12 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
        /* If tc offload feature is disabled and the block we try to bind
         * to already has some offloaded filters, forbid to bind.
         */
-       if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
+       if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
+               NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
                return -EOPNOTSUPP;
+       }
 
-       err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
+       err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
        if (err == -EOPNOTSUPP)
                goto no_offload_dev_inc;
        return err;
@@ -322,7 +421,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 
        if (!dev->netdev_ops->ndo_setup_tc)
                goto no_offload_dev_dec;
-       err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
+       err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
        if (err == -EOPNOTSUPP)
                goto no_offload_dev_dec;
        return;
@@ -332,10 +431,11 @@ no_offload_dev_dec:
 }
 
 static int
-tcf_chain_head_change_cb_add(struct tcf_chain *chain,
-                            struct tcf_block_ext_info *ei,
-                            struct netlink_ext_ack *extack)
+tcf_chain0_head_change_cb_add(struct tcf_block *block,
+                             struct tcf_block_ext_info *ei,
+                             struct netlink_ext_ack *extack)
 {
+       struct tcf_chain *chain0 = block->chain0.chain;
        struct tcf_filter_chain_list_item *item;
 
        item = kmalloc(sizeof(*item), GFP_KERNEL);
@@ -345,23 +445,25 @@ tcf_chain_head_change_cb_add(struct tcf_chain *chain,
        }
        item->chain_head_change = ei->chain_head_change;
        item->chain_head_change_priv = ei->chain_head_change_priv;
-       if (chain->filter_chain)
-               tcf_chain_head_change_item(item, chain->filter_chain);
-       list_add(&item->list, &chain->filter_chain_list);
+       if (chain0 && chain0->filter_chain)
+               tcf_chain_head_change_item(item, chain0->filter_chain);
+       list_add(&item->list, &block->chain0.filter_chain_list);
        return 0;
 }
 
 static void
-tcf_chain_head_change_cb_del(struct tcf_chain *chain,
-                            struct tcf_block_ext_info *ei)
+tcf_chain0_head_change_cb_del(struct tcf_block *block,
+                             struct tcf_block_ext_info *ei)
 {
+       struct tcf_chain *chain0 = block->chain0.chain;
        struct tcf_filter_chain_list_item *item;
 
-       list_for_each_entry(item, &chain->filter_chain_list, list) {
+       list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
                if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
                    (item->chain_head_change == ei->chain_head_change &&
                     item->chain_head_change_priv == ei->chain_head_change_priv)) {
-                       tcf_chain_head_change_item(item, NULL);
+                       if (chain0)
+                               tcf_chain_head_change_item(item, NULL);
                        list_del(&item->list);
                        kfree(item);
                        return;
@@ -397,8 +499,6 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
                                          struct netlink_ext_ack *extack)
 {
        struct tcf_block *block;
-       struct tcf_chain *chain;
-       int err;
 
        block = kzalloc(sizeof(*block), GFP_KERNEL);
        if (!block) {
@@ -408,14 +508,8 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
        INIT_LIST_HEAD(&block->chain_list);
        INIT_LIST_HEAD(&block->cb_list);
        INIT_LIST_HEAD(&block->owner_list);
+       INIT_LIST_HEAD(&block->chain0.filter_chain_list);
 
-       /* Create chain 0 by default, it has to be always present. */
-       chain = tcf_chain_create(block, 0);
-       if (!chain) {
-               NL_SET_ERR_MSG(extack, "Failed to create new tcf chain");
-               err = -ENOMEM;
-               goto err_chain_create;
-       }
        block->refcnt = 1;
        block->net = net;
        block->index = block_index;
@@ -424,10 +518,6 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
        if (!tcf_block_shared(block))
                block->q = q;
        return block;
-
-err_chain_create:
-       kfree(block);
-       return ERR_PTR(err);
 }
 
 static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
@@ -509,11 +599,6 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
        return block;
 }
 
-static struct tcf_chain *tcf_block_chain_zero(struct tcf_block *block)
-{
-       return list_first_entry(&block->chain_list, struct tcf_chain, list);
-}
-
 struct tcf_block_owner_item {
        struct list_head list;
        struct Qdisc *q;
@@ -607,12 +692,11 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
 
        tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
 
-       err = tcf_chain_head_change_cb_add(tcf_block_chain_zero(block),
-                                          ei, extack);
+       err = tcf_chain0_head_change_cb_add(block, ei, extack);
        if (err)
-               goto err_chain_head_change_cb_add;
+               goto err_chain0_head_change_cb_add;
 
-       err = tcf_block_offload_bind(block, q, ei);
+       err = tcf_block_offload_bind(block, q, ei, extack);
        if (err)
                goto err_block_offload_bind;
 
@@ -620,15 +704,14 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
        return 0;
 
 err_block_offload_bind:
-       tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
-err_chain_head_change_cb_add:
+       tcf_chain0_head_change_cb_del(block, ei);
+err_chain0_head_change_cb_add:
        tcf_block_owner_del(block, q, ei->binder_type);
 err_block_owner_add:
        if (created) {
                if (tcf_block_shared(block))
                        tcf_block_remove(block, net);
 err_block_insert:
-               kfree(tcf_block_chain_zero(block));
                kfree(block);
        } else {
                block->refcnt--;
@@ -668,10 +751,10 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 
        if (!block)
                return;
-       tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
+       tcf_chain0_head_change_cb_del(block, ei);
        tcf_block_owner_del(block, q, ei->binder_type);
 
-       if (--block->refcnt == 0) {
+       if (block->refcnt == 1) {
                if (tcf_block_shared(block))
                        tcf_block_remove(block, block->net);
 
@@ -687,13 +770,16 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 
        tcf_block_offload_unbind(block, q, ei);
 
-       if (block->refcnt == 0) {
+       if (block->refcnt == 1) {
                /* At this point, all the chains should have refcnt >= 1. */
-               list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+               list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
+                       tcf_chain_put_explicitly_created(chain);
                        tcf_chain_put(chain);
+               }
 
-               /* Finally, put chain 0 and allow block to be freed. */
-               tcf_chain_put(tcf_block_chain_zero(block));
+               block->refcnt--;
+               if (list_empty(&block->chain_list))
+                       kfree(block);
        }
 }
 EXPORT_SYMBOL(tcf_block_put_ext);
@@ -746,18 +832,53 @@ unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
 }
 EXPORT_SYMBOL(tcf_block_cb_decref);
 
+static int
+tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
+                           void *cb_priv, bool add, bool offload_in_use,
+                           struct netlink_ext_ack *extack)
+{
+       struct tcf_chain *chain;
+       struct tcf_proto *tp;
+       int err;
+
+       list_for_each_entry(chain, &block->chain_list, list) {
+               for (tp = rtnl_dereference(chain->filter_chain); tp;
+                    tp = rtnl_dereference(tp->next)) {
+                       if (tp->ops->reoffload) {
+                               err = tp->ops->reoffload(tp, add, cb, cb_priv,
+                                                        extack);
+                               if (err && add)
+                                       goto err_playback_remove;
+                       } else if (add && offload_in_use) {
+                               err = -EOPNOTSUPP;
+                               NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
+                               goto err_playback_remove;
+                       }
+               }
+       }
+
+       return 0;
+
+err_playback_remove:
+       tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
+                                   extack);
+       return err;
+}
+
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
                                             tc_setup_cb_t *cb, void *cb_ident,
-                                            void *cb_priv)
+                                            void *cb_priv,
+                                            struct netlink_ext_ack *extack)
 {
        struct tcf_block_cb *block_cb;
+       int err;
 
-       /* At this point, playback of previous block cb calls is not supported,
-        * so forbid to register to block which already has some offloaded
-        * filters present.
-        */
-       if (tcf_block_offload_in_use(block))
-               return ERR_PTR(-EOPNOTSUPP);
+       /* Replay any already present rules */
+       err = tcf_block_playback_offloads(block, cb, cb_priv, true,
+                                         tcf_block_offload_in_use(block),
+                                         extack);
+       if (err)
+               return ERR_PTR(err);
 
        block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
        if (!block_cb)
@@ -772,17 +893,22 @@ EXPORT_SYMBOL(__tcf_block_cb_register);
 
 int tcf_block_cb_register(struct tcf_block *block,
                          tc_setup_cb_t *cb, void *cb_ident,
-                         void *cb_priv)
+                         void *cb_priv, struct netlink_ext_ack *extack)
 {
        struct tcf_block_cb *block_cb;
 
-       block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
-       return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
+       block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
+                                          extack);
+       return PTR_ERR_OR_ZERO(block_cb);
 }
 EXPORT_SYMBOL(tcf_block_cb_register);
 
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+void __tcf_block_cb_unregister(struct tcf_block *block,
+                              struct tcf_block_cb *block_cb)
 {
+       tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
+                                   false, tcf_block_offload_in_use(block),
+                                   NULL);
        list_del(&block_cb->list);
        kfree(block_cb);
 }
@@ -796,7 +922,7 @@ void tcf_block_cb_unregister(struct tcf_block *block,
        block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
        if (!block_cb)
                return;
-       __tcf_block_cb_unregister(block_cb);
+       __tcf_block_cb_unregister(block, block_cb);
 }
 EXPORT_SYMBOL(tcf_block_cb_unregister);
 
@@ -893,7 +1019,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
                                struct tcf_proto *tp)
 {
        if (*chain_info->pprev == chain->filter_chain)
-               tcf_chain_head_change(chain, tp);
+               tcf_chain0_head_change(chain, tp);
        RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
        rcu_assign_pointer(*chain_info->pprev, tp);
        tcf_chain_hold(chain);
@@ -906,7 +1032,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
        struct tcf_proto *next = rtnl_dereference(chain_info->next);
 
        if (tp == chain->filter_chain)
-               tcf_chain_head_change(chain, next);
+               tcf_chain0_head_change(chain, next);
        RCU_INIT_POINTER(*chain_info->pprev, next);
        tcf_chain_put(chain);
 }
@@ -1182,6 +1308,12 @@ replay:
                goto errout;
        }
 
+       if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
+               NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
+               err = -EINVAL;
+               goto errout;
+       }
+
        err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
                              n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
                              extack);
@@ -1257,6 +1389,13 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
        }
        chain = tcf_chain_get(block, chain_index, false);
        if (!chain) {
+               /* User requested flush on non-existent chain. Nothing to do,
+                * so just return success.
+                */
+               if (prio == 0) {
+                       err = 0;
+                       goto errout;
+               }
                NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
                err = -EINVAL;
                goto errout;
@@ -1463,7 +1602,9 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
                arg.w.stop = 0;
                arg.w.skip = cb->args[1] - 1;
                arg.w.count = 0;
+               arg.w.cookie = cb->args[2];
                tp->ops->walk(tp, &arg.w);
+               cb->args[2] = arg.w.cookie;
                cb->args[1] = arg.w.count + 1;
                if (arg.w.stop)
                        return false;
@@ -1561,14 +1702,334 @@ out:
        return skb->len;
 }
 
+static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
+                             struct sk_buff *skb, struct tcf_block *block,
+                             u32 portid, u32 seq, u16 flags, int event)
+{
+       unsigned char *b = skb_tail_pointer(skb);
+       const struct tcf_proto_ops *ops;
+       struct nlmsghdr *nlh;
+       struct tcmsg *tcm;
+       void *priv;
+
+       ops = chain->tmplt_ops;
+       priv = chain->tmplt_priv;
+
+       nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+       if (!nlh)
+               goto out_nlmsg_trim;
+       tcm = nlmsg_data(nlh);
+       tcm->tcm_family = AF_UNSPEC;
+       tcm->tcm__pad1 = 0;
+       tcm->tcm__pad2 = 0;
+       tcm->tcm_handle = 0;
+       if (block->q) {
+               tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
+               tcm->tcm_parent = block->q->handle;
+       } else {
+               tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
+               tcm->tcm_block_index = block->index;
+       }
+
+       if (nla_put_u32(skb, TCA_CHAIN, chain->index))
+               goto nla_put_failure;
+
+       if (ops) {
+               if (nla_put_string(skb, TCA_KIND, ops->kind))
+                       goto nla_put_failure;
+               if (ops->tmplt_dump(skb, net, priv) < 0)
+                       goto nla_put_failure;
+       }
+
+       nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+       return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+       nlmsg_trim(skb, b);
+       return -EMSGSIZE;
+}
+
+static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
+                          u32 seq, u16 flags, int event, bool unicast)
+{
+       u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+       struct tcf_block *block = chain->block;
+       struct net *net = block->net;
+       struct sk_buff *skb;
+
+       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!skb)
+               return -ENOBUFS;
+
+       if (tc_chain_fill_node(chain, net, skb, block, portid,
+                              seq, flags, event) <= 0) {
+               kfree_skb(skb);
+               return -EINVAL;
+       }
+
+       if (unicast)
+               return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+       return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
+}
+
+static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
+                             struct nlattr **tca,
+                             struct netlink_ext_ack *extack)
+{
+       const struct tcf_proto_ops *ops;
+       void *tmplt_priv;
+
+       /* If kind is not set, user did not specify template. */
+       if (!tca[TCA_KIND])
+               return 0;
+
+       ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
+       if (IS_ERR(ops))
+               return PTR_ERR(ops);
+       if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
+               NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
+               return -EOPNOTSUPP;
+       }
+
+       tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
+       if (IS_ERR(tmplt_priv)) {
+               module_put(ops->owner);
+               return PTR_ERR(tmplt_priv);
+       }
+       chain->tmplt_ops = ops;
+       chain->tmplt_priv = tmplt_priv;
+       return 0;
+}
+
+static void tc_chain_tmplt_del(struct tcf_chain *chain)
+{
+       const struct tcf_proto_ops *ops = chain->tmplt_ops;
+
+       /* If template ops are set, no work to do for us. */
+       if (!ops)
+               return;
+
+       ops->tmplt_destroy(chain->tmplt_priv);
+       module_put(ops->owner);
+}
+
+/* Add/delete/get a chain */
+
+static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
+                       struct netlink_ext_ack *extack)
+{
+       struct net *net = sock_net(skb->sk);
+       struct nlattr *tca[TCA_MAX + 1];
+       struct tcmsg *t;
+       u32 parent;
+       u32 chain_index;
+       struct Qdisc *q = NULL;
+       struct tcf_chain *chain = NULL;
+       struct tcf_block *block;
+       unsigned long cl;
+       int err;
+
+       if (n->nlmsg_type != RTM_GETCHAIN &&
+           !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+               return -EPERM;
+
+replay:
+       err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+       if (err < 0)
+               return err;
+
+       t = nlmsg_data(n);
+       parent = t->tcm_parent;
+       cl = 0;
+
+       block = tcf_block_find(net, &q, &parent, &cl,
+                              t->tcm_ifindex, t->tcm_block_index, extack);
+       if (IS_ERR(block))
+               return PTR_ERR(block);
+
+       chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+       if (chain_index > TC_ACT_EXT_VAL_MASK) {
+               NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
+               return -EINVAL;
+       }
+       chain = tcf_chain_lookup(block, chain_index);
+       if (n->nlmsg_type == RTM_NEWCHAIN) {
+               if (chain) {
+                       if (tcf_chain_held_by_acts_only(chain)) {
+                               /* The chain exists only because there is
+                                * some action referencing it.
+                                */
+                               tcf_chain_hold(chain);
+                       } else {
+                               NL_SET_ERR_MSG(extack, "Filter chain already exists");
+                               return -EEXIST;
+                       }
+               } else {
+                       if (!(n->nlmsg_flags & NLM_F_CREATE)) {
+                               NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
+                               return -ENOENT;
+                       }
+                       chain = tcf_chain_create(block, chain_index);
+                       if (!chain) {
+                               NL_SET_ERR_MSG(extack, "Failed to create filter chain");
+                               return -ENOMEM;
+                       }
+               }
+       } else {
+               if (!chain || tcf_chain_held_by_acts_only(chain)) {
+                       NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
+                       return -EINVAL;
+               }
+               tcf_chain_hold(chain);
+       }
+
+       switch (n->nlmsg_type) {
+       case RTM_NEWCHAIN:
+               err = tc_chain_tmplt_add(chain, net, tca, extack);
+               if (err)
+                       goto errout;
+               /* In case the chain was successfully added, take a reference
+                * to the chain. This ensures that an empty chain
+                * does not disappear at the end of this function.
+                */
+               tcf_chain_hold(chain);
+               chain->explicitly_created = true;
+               tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
+                               RTM_NEWCHAIN, false);
+               break;
+       case RTM_DELCHAIN:
+               /* Flush the chain first as the user requested chain removal. */
+               tcf_chain_flush(chain);
+               /* In case the chain was successfully deleted, put a reference
+                * to the chain previously taken during addition.
+                */
+               tcf_chain_put_explicitly_created(chain);
+               chain->explicitly_created = false;
+               break;
+       case RTM_GETCHAIN:
+               err = tc_chain_notify(chain, skb, n->nlmsg_seq,
+                                     n->nlmsg_seq, n->nlmsg_type, true);
+               if (err < 0)
+                       NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
+               break;
+       default:
+               err = -EOPNOTSUPP;
+               NL_SET_ERR_MSG(extack, "Unsupported message type");
+               goto errout;
+       }
+
+errout:
+       tcf_chain_put(chain);
+       if (err == -EAGAIN)
+               /* Replay the request. */
+               goto replay;
+       return err;
+}
+
+/* called with RTNL */
+static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct net *net = sock_net(skb->sk);
+       struct nlattr *tca[TCA_MAX + 1];
+       struct Qdisc *q = NULL;
+       struct tcf_block *block;
+       struct tcf_chain *chain;
+       struct tcmsg *tcm = nlmsg_data(cb->nlh);
+       long index_start;
+       long index;
+       u32 parent;
+       int err;
+
+       if (nlmsg_len(cb->nlh) < sizeof(*tcm))
+               return skb->len;
+
+       err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+       if (err)
+               return err;
+
+       if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+               block = tcf_block_lookup(net, tcm->tcm_block_index);
+               if (!block)
+                       goto out;
+               /* If we work with block index, q is NULL and parent value
+                * will never be used in the following code. The check
+                * in tcf_fill_node prevents it. However, compiler does not
+                * see that far, so set parent to zero to silence the warning
+                * about parent being uninitialized.
+                */
+               parent = 0;
+       } else {
+               const struct Qdisc_class_ops *cops;
+               struct net_device *dev;
+               unsigned long cl = 0;
+
+               dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+               if (!dev)
+                       return skb->len;
+
+               parent = tcm->tcm_parent;
+               if (!parent) {
+                       q = dev->qdisc;
+                       parent = q->handle;
+               } else {
+                       q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+               }
+               if (!q)
+                       goto out;
+               cops = q->ops->cl_ops;
+               if (!cops)
+                       goto out;
+               if (!cops->tcf_block)
+                       goto out;
+               if (TC_H_MIN(tcm->tcm_parent)) {
+                       cl = cops->find(q, tcm->tcm_parent);
+                       if (cl == 0)
+                               goto out;
+               }
+               block = cops->tcf_block(q, cl, NULL);
+               if (!block)
+                       goto out;
+               if (tcf_block_shared(block))
+                       q = NULL;
+       }
+
+       index_start = cb->args[0];
+       index = 0;
+
+       list_for_each_entry(chain, &block->chain_list, list) {
+               if ((tca[TCA_CHAIN] &&
+                    nla_get_u32(tca[TCA_CHAIN]) != chain->index))
+                       continue;
+               if (index < index_start) {
+                       index++;
+                       continue;
+               }
+               if (tcf_chain_held_by_acts_only(chain))
+                       continue;
+               err = tc_chain_fill_node(chain, net, skb, block,
+                                        NETLINK_CB(cb->skb).portid,
+                                        cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                                        RTM_NEWCHAIN);
+               if (err <= 0)
+                       break;
+               index++;
+       }
+
+       cb->args[0] = index;
+
+out:
+       /* If we did no progress, the error (EMSGSIZE) is real */
+       if (skb->len == 0 && err)
+               return err;
+       return skb->len;
+}
+
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-       LIST_HEAD(actions);
-
-       ASSERT_RTNL();
-       tcf_exts_to_list(exts, &actions);
-       tcf_action_destroy(&actions, TCA_ACT_UNBIND);
+       tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
        kfree(exts->actions);
        exts->nr_actions = 0;
 #endif
@@ -1587,7 +2048,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                if (exts->police && tb[exts->police]) {
                        act = tcf_action_init_1(net, tp, tb[exts->police],
                                                rate_tlv, "police", ovr,
-                                               TCA_ACT_BIND, extack);
+                                               TCA_ACT_BIND, true, extack);
                        if (IS_ERR(act))
                                return PTR_ERR(act);
 
@@ -1595,17 +2056,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                        exts->actions[0] = act;
                        exts->nr_actions = 1;
                } else if (exts->action && tb[exts->action]) {
-                       LIST_HEAD(actions);
-                       int err, i = 0;
+                       int err;
 
                        err = tcf_action_init(net, tp, tb[exts->action],
                                              rate_tlv, NULL, ovr, TCA_ACT_BIND,
-                                             &actions, &attr_size, extack);
-                       if (err)
+                                             exts->actions, &attr_size, true,
+                                             extack);
+                       if (err < 0)
                                return err;
-                       list_for_each_entry(act, &actions, list)
-                               exts->actions[i++] = act;
-                       exts->nr_actions = i;
+                       exts->nr_actions = err;
                }
                exts->net = net;
        }
@@ -1654,14 +2113,11 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
                 * tc data even if iproute2  was newer - jhs
                 */
                if (exts->type != TCA_OLD_COMPAT) {
-                       LIST_HEAD(actions);
-
                        nest = nla_nest_start(skb, exts->action);
                        if (nest == NULL)
                                goto nla_put_failure;
 
-                       tcf_exts_to_list(exts, &actions);
-                       if (tcf_action_dump(skb, &actions, 0, 0) < 0)
+                       if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
                                goto nla_put_failure;
                        nla_nest_end(skb, nest);
                } else if (exts->police) {
@@ -1786,6 +2242,10 @@ static int __init tc_filter_init(void)
        rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
        rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
                      tc_dump_tfilter, 0);
+       rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
+       rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
+       rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
+                     tc_dump_chain, 0);
 
        return 0;
 
index 95367f37098de53ee4b133ff6cd356ccb1e43e55..6a5dce8baf190109f2e9e6902f872236211fc3ff 100644 (file)
@@ -324,4 +324,3 @@ static void __exit exit_basic(void)
 module_init(init_basic)
 module_exit(exit_basic)
 MODULE_LICENSE("GPL");
-
index 1aa7f6511065a1d1da3eda4a409c66c5ce0bc773..fa6fe2fe0f32b521ccb7a77f4e19693e08baf859 100644 (file)
@@ -43,6 +43,7 @@ struct cls_bpf_prog {
        struct tcf_result res;
        bool exts_integrated;
        u32 gen_flags;
+       unsigned int in_hw_count;
        struct tcf_exts exts;
        u32 handle;
        u16 bpf_num_ops;
@@ -174,6 +175,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
                        cls_bpf_offload_cmd(tp, oldprog, prog, extack);
                        return err;
                } else if (err > 0) {
+                       prog->in_hw_count = err;
                        tcf_block_offload_inc(block, &prog->gen_flags);
                }
        }
@@ -347,12 +349,10 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
        if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
                return -EINVAL;
 
-       bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+       bpf_ops = kmemdup(nla_data(tb[TCA_BPF_OPS]), bpf_size, GFP_KERNEL);
        if (bpf_ops == NULL)
                return -ENOMEM;
 
-       memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
-
        fprog_tmp.len = bpf_num_ops;
        fprog_tmp.filter = bpf_ops;
 
@@ -652,6 +652,42 @@ skip:
        }
 }
 
+static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+                            void *cb_priv, struct netlink_ext_ack *extack)
+{
+       struct cls_bpf_head *head = rtnl_dereference(tp->root);
+       struct tcf_block *block = tp->chain->block;
+       struct tc_cls_bpf_offload cls_bpf = {};
+       struct cls_bpf_prog *prog;
+       int err;
+
+       list_for_each_entry(prog, &head->plist, link) {
+               if (tc_skip_hw(prog->gen_flags))
+                       continue;
+
+               tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags,
+                                          extack);
+               cls_bpf.command = TC_CLSBPF_OFFLOAD;
+               cls_bpf.exts = &prog->exts;
+               cls_bpf.prog = add ? prog->filter : NULL;
+               cls_bpf.oldprog = add ? NULL : prog->filter;
+               cls_bpf.name = prog->bpf_name;
+               cls_bpf.exts_integrated = prog->exts_integrated;
+
+               err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv);
+               if (err) {
+                       if (add && tc_skip_sw(prog->gen_flags))
+                               return err;
+                       continue;
+               }
+
+               tc_cls_offload_cnt_update(block, &prog->in_hw_count,
+                                         &prog->gen_flags, add);
+       }
+
+       return 0;
+}
+
 static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
        .kind           =       "bpf",
        .owner          =       THIS_MODULE,
@@ -662,6 +698,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
        .change         =       cls_bpf_change,
        .delete         =       cls_bpf_delete,
        .walk           =       cls_bpf_walk,
+       .reoffload      =       cls_bpf_reoffload,
        .dump           =       cls_bpf_dump,
        .bind_class     =       cls_bpf_bind_class,
 };
index 9e8b26a80fb3ea9e57b6b22d259eaefe171eca09..e8bd08ba998a9bf6c12cb13e32bd297fc7cf10cd 100644 (file)
@@ -35,6 +35,7 @@ struct fl_flow_key {
        struct flow_dissector_key_basic basic;
        struct flow_dissector_key_eth_addrs eth;
        struct flow_dissector_key_vlan vlan;
+       struct flow_dissector_key_vlan cvlan;
        union {
                struct flow_dissector_key_ipv4_addrs ipv4;
                struct flow_dissector_key_ipv6_addrs ipv6;
@@ -51,6 +52,7 @@ struct fl_flow_key {
        struct flow_dissector_key_mpls mpls;
        struct flow_dissector_key_tcp tcp;
        struct flow_dissector_key_ip ip;
+       struct flow_dissector_key_ip enc_ip;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -70,6 +72,13 @@ struct fl_flow_mask {
        struct list_head list;
 };
 
+struct fl_flow_tmplt {
+       struct fl_flow_key dummy_key;
+       struct fl_flow_key mask;
+       struct flow_dissector dissector;
+       struct tcf_chain *chain;
+};
+
 struct cls_fl_head {
        struct rhashtable ht;
        struct list_head masks;
@@ -87,6 +96,7 @@ struct cls_fl_filter {
        struct list_head list;
        u32 handle;
        u32 flags;
+       unsigned int in_hw_count;
        struct rcu_work rwork;
        struct net_device *hw_dev;
 };
@@ -144,6 +154,23 @@ static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
                *lmkey++ = *lkey++ & *lmask++;
 }
 
+static bool fl_mask_fits_tmplt(struct fl_flow_tmplt *tmplt,
+                              struct fl_flow_mask *mask)
+{
+       const long *lmask = fl_key_get_start(&mask->key, mask);
+       const long *ltmplt;
+       int i;
+
+       if (!tmplt)
+               return true;
+       ltmplt = fl_key_get_start(&tmplt->mask, mask);
+       for (i = 0; i < fl_mask_range(mask); i += sizeof(long)) {
+               if (~*ltmplt++ & *lmask++)
+                       return false;
+       }
+       return true;
+}
+
 static void fl_clear_masked_range(struct fl_flow_key *key,
                                  struct fl_flow_mask *mask)
 {
@@ -289,6 +316,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
                fl_hw_destroy_filter(tp, f, NULL);
                return err;
        } else if (err > 0) {
+               f->in_hw_count = err;
                tcf_block_offload_inc(block, &f->flags);
        }
 
@@ -447,6 +475,13 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
        [TCA_FLOWER_KEY_IP_TOS_MASK]    = { .type = NLA_U8 },
        [TCA_FLOWER_KEY_IP_TTL]         = { .type = NLA_U8 },
        [TCA_FLOWER_KEY_IP_TTL_MASK]    = { .type = NLA_U8 },
+       [TCA_FLOWER_KEY_CVLAN_ID]       = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_CVLAN_PRIO]     = { .type = NLA_U8 },
+       [TCA_FLOWER_KEY_CVLAN_ETH_TYPE] = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_ENC_IP_TOS]     = { .type = NLA_U8 },
+       [TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
+       [TCA_FLOWER_KEY_ENC_IP_TTL]      = { .type = NLA_U8 },
+       [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -498,22 +533,26 @@ static int fl_set_key_mpls(struct nlattr **tb,
 }
 
 static void fl_set_key_vlan(struct nlattr **tb,
+                           __be16 ethertype,
+                           int vlan_id_key, int vlan_prio_key,
                            struct flow_dissector_key_vlan *key_val,
                            struct flow_dissector_key_vlan *key_mask)
 {
 #define VLAN_PRIORITY_MASK     0x7
 
-       if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
+       if (tb[vlan_id_key]) {
                key_val->vlan_id =
-                       nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
+                       nla_get_u16(tb[vlan_id_key]) & VLAN_VID_MASK;
                key_mask->vlan_id = VLAN_VID_MASK;
        }
-       if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
+       if (tb[vlan_prio_key]) {
                key_val->vlan_priority =
-                       nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
+                       nla_get_u8(tb[vlan_prio_key]) &
                        VLAN_PRIORITY_MASK;
                key_mask->vlan_priority = VLAN_PRIORITY_MASK;
        }
+       key_val->vlan_tpid = ethertype;
+       key_mask->vlan_tpid = cpu_to_be16(~0);
 }
 
 static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
@@ -551,17 +590,17 @@ static int fl_set_key_flags(struct nlattr **tb,
        return 0;
 }
 
-static void fl_set_key_ip(struct nlattr **tb,
+static void fl_set_key_ip(struct nlattr **tb, bool encap,
                          struct flow_dissector_key_ip *key,
                          struct flow_dissector_key_ip *mask)
 {
-               fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS,
-                              &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK,
-                              sizeof(key->tos));
+       int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
+       int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
+       int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
+       int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;
 
-               fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL,
-                              &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK,
-                              sizeof(key->ttl));
+       fl_set_key_val(tb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos));
+       fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
 }
 
 static int fl_set_key(struct net *net, struct nlattr **tb,
@@ -590,12 +629,28 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
        if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
                ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
 
-               if (ethertype == htons(ETH_P_8021Q)) {
-                       fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
-                       fl_set_key_val(tb, &key->basic.n_proto,
-                                      TCA_FLOWER_KEY_VLAN_ETH_TYPE,
-                                      &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
-                                      sizeof(key->basic.n_proto));
+               if (eth_type_vlan(ethertype)) {
+                       fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
+                                       TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
+                                       &mask->vlan);
+
+                       if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
+                               ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
+                               if (eth_type_vlan(ethertype)) {
+                                       fl_set_key_vlan(tb, ethertype,
+                                                       TCA_FLOWER_KEY_CVLAN_ID,
+                                                       TCA_FLOWER_KEY_CVLAN_PRIO,
+                                                       &key->cvlan, &mask->cvlan);
+                                       fl_set_key_val(tb, &key->basic.n_proto,
+                                                      TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+                                                      &mask->basic.n_proto,
+                                                      TCA_FLOWER_UNSPEC,
+                                                      sizeof(key->basic.n_proto));
+                               } else {
+                                       key->basic.n_proto = ethertype;
+                                       mask->basic.n_proto = cpu_to_be16(~0);
+                               }
+                       }
                } else {
                        key->basic.n_proto = ethertype;
                        mask->basic.n_proto = cpu_to_be16(~0);
@@ -607,7 +662,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
                fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
                               &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
                               sizeof(key->basic.ip_proto));
-               fl_set_key_ip(tb, &key->ip, &mask->ip);
+               fl_set_key_ip(tb, false, &key->ip, &mask->ip);
        }
 
        if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
@@ -742,6 +797,8 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
                       &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
                       sizeof(key->enc_tp.dst));
 
+       fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
+
        if (tb[TCA_FLOWER_KEY_FLAGS])
                ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
 
@@ -793,47 +850,52 @@ static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
                        FL_KEY_SET(keys, cnt, id, member);                      \
        } while(0);
 
-static void fl_init_dissector(struct fl_flow_mask *mask)
+static void fl_init_dissector(struct flow_dissector *dissector,
+                             struct fl_flow_key *mask)
 {
        struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
        size_t cnt = 0;
 
        FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
        FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_PORTS, tp);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_IP, ip);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_TCP, tcp);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_ICMP, icmp);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_ARP, arp);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_MPLS, mpls);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_VLAN, vlan);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+                            FLOW_DISSECTOR_KEY_CVLAN, cvlan);
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
-       if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) ||
-           FL_KEY_IS_MASKED(&mask->key, enc_ipv6))
+       if (FL_KEY_IS_MASKED(mask, enc_ipv4) ||
+           FL_KEY_IS_MASKED(mask, enc_ipv6))
                FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
                           enc_control);
-       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
                             FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
+       FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+                            FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
 
-       skb_flow_dissector_init(&mask->dissector, keys, cnt);
+       skb_flow_dissector_init(dissector, keys, cnt);
 }
 
 static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
@@ -852,7 +914,7 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
        if (err)
                goto errout_free;
 
-       fl_init_dissector(newmask);
+       fl_init_dissector(&newmask->dissector, &newmask->key);
 
        INIT_LIST_HEAD_RCU(&newmask->filters);
 
@@ -901,6 +963,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
                        struct cls_fl_filter *f, struct fl_flow_mask *mask,
                        unsigned long base, struct nlattr **tb,
                        struct nlattr *est, bool ovr,
+                       struct fl_flow_tmplt *tmplt,
                        struct netlink_ext_ack *extack)
 {
        int err;
@@ -921,6 +984,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
        fl_mask_update_range(mask);
        fl_set_masked_key(&f->mkey, &f->key, mask);
 
+       if (!fl_mask_fits_tmplt(tmplt, mask)) {
+               NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
+               return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -986,7 +1054,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
        }
 
        err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr,
-                          extack);
+                          tp->chain->tmplt_priv, extack);
        if (err)
                goto errout_idr;
 
@@ -1071,20 +1139,144 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
        struct cls_fl_head *head = rtnl_dereference(tp->root);
        struct cls_fl_filter *f;
+
+       arg->count = arg->skip;
+
+       while ((f = idr_get_next_ul(&head->handle_idr,
+                                   &arg->cookie)) != NULL) {
+               if (arg->fn(tp, f, arg) < 0) {
+                       arg->stop = 1;
+                       break;
+               }
+               arg->cookie = f->handle + 1;
+               arg->count++;
+       }
+}
+
+static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+                       void *cb_priv, struct netlink_ext_ack *extack)
+{
+       struct cls_fl_head *head = rtnl_dereference(tp->root);
+       struct tc_cls_flower_offload cls_flower = {};
+       struct tcf_block *block = tp->chain->block;
        struct fl_flow_mask *mask;
+       struct cls_fl_filter *f;
+       int err;
 
-       list_for_each_entry_rcu(mask, &head->masks, list) {
-               list_for_each_entry_rcu(f, &mask->filters, list) {
-                       if (arg->count < arg->skip)
-                               goto skip;
-                       if (arg->fn(tp, f, arg) < 0) {
-                               arg->stop = 1;
-                               break;
+       list_for_each_entry(mask, &head->masks, list) {
+               list_for_each_entry(f, &mask->filters, list) {
+                       if (tc_skip_hw(f->flags))
+                               continue;
+
+                       tc_cls_common_offload_init(&cls_flower.common, tp,
+                                                  f->flags, extack);
+                       cls_flower.command = add ?
+                               TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
+                       cls_flower.cookie = (unsigned long)f;
+                       cls_flower.dissector = &mask->dissector;
+                       cls_flower.mask = &f->mkey;
+                       cls_flower.key = &f->key;
+                       cls_flower.exts = &f->exts;
+                       cls_flower.classid = f->res.classid;
+
+                       err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+                       if (err) {
+                               if (add && tc_skip_sw(f->flags))
+                                       return err;
+                               continue;
                        }
-skip:
-                       arg->count++;
+
+                       tc_cls_offload_cnt_update(block, &f->in_hw_count,
+                                                 &f->flags, add);
                }
        }
+
+       return 0;
+}
+
+static void fl_hw_create_tmplt(struct tcf_chain *chain,
+                              struct fl_flow_tmplt *tmplt)
+{
+       struct tc_cls_flower_offload cls_flower = {};
+       struct tcf_block *block = chain->block;
+       struct tcf_exts dummy_exts = { 0, };
+
+       cls_flower.common.chain_index = chain->index;
+       cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
+       cls_flower.cookie = (unsigned long) tmplt;
+       cls_flower.dissector = &tmplt->dissector;
+       cls_flower.mask = &tmplt->mask;
+       cls_flower.key = &tmplt->dummy_key;
+       cls_flower.exts = &dummy_exts;
+
+       /* We don't care if driver (any of them) fails to handle this
+        * call. It serves just as a hint for it.
+        */
+       tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
+                        &cls_flower, false);
+}
+
+static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
+                               struct fl_flow_tmplt *tmplt)
+{
+       struct tc_cls_flower_offload cls_flower = {};
+       struct tcf_block *block = chain->block;
+
+       cls_flower.common.chain_index = chain->index;
+       cls_flower.command = TC_CLSFLOWER_TMPLT_DESTROY;
+       cls_flower.cookie = (unsigned long) tmplt;
+
+       tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
+                        &cls_flower, false);
+}
+
+static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
+                            struct nlattr **tca,
+                            struct netlink_ext_ack *extack)
+{
+       struct fl_flow_tmplt *tmplt;
+       struct nlattr **tb;
+       int err;
+
+       if (!tca[TCA_OPTIONS])
+               return ERR_PTR(-EINVAL);
+
+       tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
+       if (!tb)
+               return ERR_PTR(-ENOBUFS);
+       err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
+                              fl_policy, NULL);
+       if (err)
+               goto errout_tb;
+
+       tmplt = kzalloc(sizeof(*tmplt), GFP_KERNEL);
+       if (!tmplt)
+               goto errout_tb;
+       tmplt->chain = chain;
+       err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack);
+       if (err)
+               goto errout_tmplt;
+       kfree(tb);
+
+       fl_init_dissector(&tmplt->dissector, &tmplt->mask);
+
+       fl_hw_create_tmplt(chain, tmplt);
+
+       return tmplt;
+
+errout_tmplt:
+       kfree(tmplt);
+errout_tb:
+       kfree(tb);
+       return ERR_PTR(err);
+}
+
+static void fl_tmplt_destroy(void *tmplt_priv)
+{
+       struct fl_flow_tmplt *tmplt = tmplt_priv;
+
+       fl_hw_destroy_tmplt(tmplt->chain, tmplt);
+       kfree(tmplt);
 }
 
 static int fl_dump_key_val(struct sk_buff *skb,
@@ -1141,20 +1333,24 @@ static int fl_dump_key_mpls(struct sk_buff *skb,
        return 0;
 }
 
-static int fl_dump_key_ip(struct sk_buff *skb,
+static int fl_dump_key_ip(struct sk_buff *skb, bool encap,
                          struct flow_dissector_key_ip *key,
                          struct flow_dissector_key_ip *mask)
 {
-       if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos,
-                           TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) ||
-           fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl,
-                           TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl)))
+       int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
+       int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
+       int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
+       int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;
+
+       if (fl_dump_key_val(skb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos)) ||
+           fl_dump_key_val(skb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl)))
                return -1;
 
        return 0;
 }
 
 static int fl_dump_key_vlan(struct sk_buff *skb,
+                           int vlan_id_key, int vlan_prio_key,
                            struct flow_dissector_key_vlan *vlan_key,
                            struct flow_dissector_key_vlan *vlan_mask)
 {
@@ -1163,13 +1359,13 @@ static int fl_dump_key_vlan(struct sk_buff *skb,
        if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
                return 0;
        if (vlan_mask->vlan_id) {
-               err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
+               err = nla_put_u16(skb, vlan_id_key,
                                  vlan_key->vlan_id);
                if (err)
                        return err;
        }
        if (vlan_mask->vlan_priority) {
-               err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
+               err = nla_put_u8(skb, vlan_prio_key,
                                 vlan_key->vlan_priority);
                if (err)
                        return err;
@@ -1216,29 +1412,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
        return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
 }
 
-static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
-                  struct sk_buff *skb, struct tcmsg *t)
+static int fl_dump_key(struct sk_buff *skb, struct net *net,
+                      struct fl_flow_key *key, struct fl_flow_key *mask)
 {
-       struct cls_fl_filter *f = fh;
-       struct nlattr *nest;
-       struct fl_flow_key *key, *mask;
-
-       if (!f)
-               return skb->len;
-
-       t->tcm_handle = f->handle;
-
-       nest = nla_nest_start(skb, TCA_OPTIONS);
-       if (!nest)
-               goto nla_put_failure;
-
-       if (f->res.classid &&
-           nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
-               goto nla_put_failure;
-
-       key = &f->key;
-       mask = &f->mask->key;
-
        if (mask->indev_ifindex) {
                struct net_device *dev;
 
@@ -1247,9 +1423,6 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
                        goto nla_put_failure;
        }
 
-       if (!tc_skip_hw(f->flags))
-               fl_hw_update_stats(tp, f);
-
        if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
                            mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
                            sizeof(key->eth.dst)) ||
@@ -1264,15 +1437,36 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
        if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
                goto nla_put_failure;
 
-       if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
+       if (fl_dump_key_vlan(skb, TCA_FLOWER_KEY_VLAN_ID,
+                            TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, &mask->vlan))
                goto nla_put_failure;
 
+       if (fl_dump_key_vlan(skb, TCA_FLOWER_KEY_CVLAN_ID,
+                            TCA_FLOWER_KEY_CVLAN_PRIO,
+                            &key->cvlan, &mask->cvlan) ||
+           (mask->cvlan.vlan_tpid &&
+            nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+                         key->cvlan.vlan_tpid)))
+               goto nla_put_failure;
+
+       if (mask->basic.n_proto) {
+               if (mask->cvlan.vlan_tpid) {
+                       if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+                                        key->basic.n_proto))
+                               goto nla_put_failure;
+               } else if (mask->vlan.vlan_tpid) {
+                       if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+                                        key->basic.n_proto))
+                               goto nla_put_failure;
+               }
+       }
+
        if ((key->basic.n_proto == htons(ETH_P_IP) ||
             key->basic.n_proto == htons(ETH_P_IPV6)) &&
            (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
                            &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
                            sizeof(key->basic.ip_proto)) ||
-           fl_dump_key_ip(skb, &key->ip, &mask->ip)))
+           fl_dump_key_ip(skb, false, &key->ip, &mask->ip)))
                goto nla_put_failure;
 
        if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
@@ -1397,12 +1591,48 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
                            TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
                            &mask->enc_tp.dst,
                            TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
-                           sizeof(key->enc_tp.dst)))
+                           sizeof(key->enc_tp.dst)) ||
+           fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip))
                goto nla_put_failure;
 
        if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
                goto nla_put_failure;
 
+       return 0;
+
+nla_put_failure:
+       return -EMSGSIZE;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
+                  struct sk_buff *skb, struct tcmsg *t)
+{
+       struct cls_fl_filter *f = fh;
+       struct nlattr *nest;
+       struct fl_flow_key *key, *mask;
+
+       if (!f)
+               return skb->len;
+
+       t->tcm_handle = f->handle;
+
+       nest = nla_nest_start(skb, TCA_OPTIONS);
+       if (!nest)
+               goto nla_put_failure;
+
+       if (f->res.classid &&
+           nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+               goto nla_put_failure;
+
+       key = &f->key;
+       mask = &f->mask->key;
+
+       if (fl_dump_key(skb, net, key, mask))
+               goto nla_put_failure;
+
+       if (!tc_skip_hw(f->flags))
+               fl_hw_update_stats(tp, f);
+
        if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
                goto nla_put_failure;
 
@@ -1421,6 +1651,31 @@ nla_put_failure:
        return -1;
 }
 
+static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv)
+{
+       struct fl_flow_tmplt *tmplt = tmplt_priv;
+       struct fl_flow_key *key, *mask;
+       struct nlattr *nest;
+
+       nest = nla_nest_start(skb, TCA_OPTIONS);
+       if (!nest)
+               goto nla_put_failure;
+
+       key = &tmplt->dummy_key;
+       mask = &tmplt->mask;
+
+       if (fl_dump_key(skb, net, key, mask))
+               goto nla_put_failure;
+
+       nla_nest_end(skb, nest);
+
+       return skb->len;
+
+nla_put_failure:
+       nla_nest_cancel(skb, nest);
+       return -EMSGSIZE;
+}
+
 static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
 {
        struct cls_fl_filter *f = fh;
@@ -1438,8 +1693,12 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
        .change         = fl_change,
        .delete         = fl_delete,
        .walk           = fl_walk,
+       .reoffload      = fl_reoffload,
        .dump           = fl_dump,
        .bind_class     = fl_bind_class,
+       .tmplt_create   = fl_tmplt_create,
+       .tmplt_destroy  = fl_tmplt_destroy,
+       .tmplt_dump     = fl_tmplt_dump,
        .owner          = THIS_MODULE,
 };
 
index 47b207ef77620f8baa530a8bc8c44c35ecf2df78..af16f36ed578910824cac069dad85325e3271e0e 100644 (file)
@@ -21,6 +21,7 @@ struct cls_mall_head {
        struct tcf_result res;
        u32 handle;
        u32 flags;
+       unsigned int in_hw_count;
        struct rcu_work rwork;
 };
 
@@ -95,6 +96,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
                mall_destroy_hw_filter(tp, head, cookie, NULL);
                return err;
        } else if (err > 0) {
+               head->in_hw_count = err;
                tcf_block_offload_inc(block, &head->flags);
        }
 
@@ -235,6 +237,35 @@ skip:
        arg->count++;
 }
 
+static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+                         void *cb_priv, struct netlink_ext_ack *extack)
+{
+       struct cls_mall_head *head = rtnl_dereference(tp->root);
+       struct tc_cls_matchall_offload cls_mall = {};
+       struct tcf_block *block = tp->chain->block;
+       int err;
+
+       if (tc_skip_hw(head->flags))
+               return 0;
+
+       tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
+       cls_mall.command = add ?
+               TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
+       cls_mall.exts = &head->exts;
+       cls_mall.cookie = (unsigned long)head;
+
+       err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv);
+       if (err) {
+               if (add && tc_skip_sw(head->flags))
+                       return err;
+               return 0;
+       }
+
+       tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add);
+
+       return 0;
+}
+
 static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
                     struct sk_buff *skb, struct tcmsg *t)
 {
@@ -289,6 +320,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = {
        .change         = mall_change,
        .delete         = mall_delete,
        .walk           = mall_walk,
+       .reoffload      = mall_reoffload,
        .dump           = mall_dump,
        .bind_class     = mall_bind_class,
        .owner          = THIS_MODULE,
index fb861f90fde6610d7fa4f7b6908742b307a4b9d0..d5d2a6dc39216b0ca28bd11094f0b64fda5c5964 100644 (file)
@@ -62,6 +62,7 @@ struct tc_u_knode {
        struct tc_u32_pcnt __percpu *pf;
 #endif
        u32                     flags;
+       unsigned int            in_hw_count;
 #ifdef CONFIG_CLS_U32_MARK
        u32                     val;
        u32                     mask;
@@ -571,6 +572,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
                u32_remove_hw_knode(tp, n, NULL);
                return err;
        } else if (err > 0) {
+               n->in_hw_count = err;
                tcf_block_offload_inc(block, &n->flags);
        }
 
@@ -1199,6 +1201,114 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
        }
 }
 
+static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+                              bool add, tc_setup_cb_t *cb, void *cb_priv,
+                              struct netlink_ext_ack *extack)
+{
+       struct tc_cls_u32_offload cls_u32 = {};
+       int err;
+
+       tc_cls_common_offload_init(&cls_u32.common, tp, ht->flags, extack);
+       cls_u32.command = add ? TC_CLSU32_NEW_HNODE : TC_CLSU32_DELETE_HNODE;
+       cls_u32.hnode.divisor = ht->divisor;
+       cls_u32.hnode.handle = ht->handle;
+       cls_u32.hnode.prio = ht->prio;
+
+       err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
+       if (err && add && tc_skip_sw(ht->flags))
+               return err;
+
+       return 0;
+}
+
+static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
+                              bool add, tc_setup_cb_t *cb, void *cb_priv,
+                              struct netlink_ext_ack *extack)
+{
+       struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
+       struct tcf_block *block = tp->chain->block;
+       struct tc_cls_u32_offload cls_u32 = {};
+       int err;
+
+       tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
+       cls_u32.command = add ?
+               TC_CLSU32_REPLACE_KNODE : TC_CLSU32_DELETE_KNODE;
+       cls_u32.knode.handle = n->handle;
+
+       if (add) {
+               cls_u32.knode.fshift = n->fshift;
+#ifdef CONFIG_CLS_U32_MARK
+               cls_u32.knode.val = n->val;
+               cls_u32.knode.mask = n->mask;
+#else
+               cls_u32.knode.val = 0;
+               cls_u32.knode.mask = 0;
+#endif
+               cls_u32.knode.sel = &n->sel;
+               cls_u32.knode.exts = &n->exts;
+               if (n->ht_down)
+                       cls_u32.knode.link_handle = ht->handle;
+       }
+
+       err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
+       if (err) {
+               if (add && tc_skip_sw(n->flags))
+                       return err;
+               return 0;
+       }
+
+       tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add);
+
+       return 0;
+}
+
+static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+                        void *cb_priv, struct netlink_ext_ack *extack)
+{
+       struct tc_u_common *tp_c = tp->data;
+       struct tc_u_hnode *ht;
+       struct tc_u_knode *n;
+       unsigned int h;
+       int err;
+
+       for (ht = rtnl_dereference(tp_c->hlist);
+            ht;
+            ht = rtnl_dereference(ht->next)) {
+               if (ht->prio != tp->prio)
+                       continue;
+
+               /* When adding filters to a new dev, try to offload the
+                * hashtable first. When removing, do the filters before the
+                * hashtable.
+                */
+               if (add && !tc_skip_hw(ht->flags)) {
+                       err = u32_reoffload_hnode(tp, ht, add, cb, cb_priv,
+                                                 extack);
+                       if (err)
+                               return err;
+               }
+
+               for (h = 0; h <= ht->divisor; h++) {
+                       for (n = rtnl_dereference(ht->ht[h]);
+                            n;
+                            n = rtnl_dereference(n->next)) {
+                               if (tc_skip_hw(n->flags))
+                                       continue;
+
+                               err = u32_reoffload_knode(tp, n, add, cb,
+                                                         cb_priv, extack);
+                               if (err)
+                                       return err;
+                       }
+               }
+
+               if (!add && !tc_skip_hw(ht->flags))
+                       u32_reoffload_hnode(tp, ht, add, cb, cb_priv, extack);
+       }
+
+       return 0;
+}
+
 static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
 {
        struct tc_u_knode *n = fh;
@@ -1336,6 +1446,7 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
        .change         =       u32_change,
        .delete         =       u32_delete,
        .walk           =       u32_walk,
+       .reoffload      =       u32_reoffload,
        .dump           =       u32_dump,
        .bind_class     =       u32_bind_class,
        .owner          =       THIS_MODULE,
index 54eca685420f317b582a88508797bbc6327b75d0..98541c6399db53f5d8ae46aee0f17cad7e0a127e 100644 (file)
@@ -596,12 +596,19 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
        return HRTIMER_NORESTART;
 }
 
-void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
+                                clockid_t clockid)
 {
-       hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+       hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
        wd->timer.function = qdisc_watchdog;
        wd->qdisc = qdisc;
 }
+EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
+
+void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+{
+       qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
+}
 EXPORT_SYMBOL(qdisc_watchdog_init);
 
 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
new file mode 100644 (file)
index 0000000..35fc725
--- /dev/null
@@ -0,0 +1,3020 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* COMMON Applications Kept Enhanced (CAKE) discipline
+ *
+ * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com>
+ * Copyright (C) 2015-2018 Toke Høiland-Jørgensen <toke@toke.dk>
+ * Copyright (C) 2014-2018 Dave Täht <dave.taht@gmail.com>
+ * Copyright (C) 2015-2018 Sebastian Moeller <moeller0@gmx.de>
+ * (C) 2015-2018 Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
+ * Copyright (C) 2017-2018 Ryan Mounce <ryan@mounce.com.au>
+ *
+ * The CAKE Principles:
+ *                (or, how to have your cake and eat it too)
+ *
+ * This is a combination of several shaping, AQM and FQ techniques into one
+ * easy-to-use package:
+ *
+ * - An overall bandwidth shaper, to move the bottleneck away from dumb CPE
+ *   equipment and bloated MACs.  This operates in deficit mode (as in sch_fq),
+ *   eliminating the need for any sort of burst parameter (eg. token bucket
+ *   depth).  Burst support is limited to that necessary to overcome scheduling
+ *   latency.
+ *
+ * - A Diffserv-aware priority queue, giving more priority to certain classes,
+ *   up to a specified fraction of bandwidth.  Above that bandwidth threshold,
+ *   the priority is reduced to avoid starving other tins.
+ *
+ * - Each priority tin has a separate Flow Queue system, to isolate traffic
+ *   flows from each other.  This prevents a burst on one flow from increasing
+ *   the delay to another.  Flows are distributed to queues using a
+ *   set-associative hash function.
+ *
+ * - Each queue is actively managed by Cobalt, which is a combination of the
+ *   Codel and Blue AQM algorithms.  This serves flows fairly, and signals
+ *   congestion early via ECN (if available) and/or packet drops, to keep
+ *   latency low.  The codel parameters are auto-tuned based on the bandwidth
+ *   setting, as is necessary at low bandwidths.
+ *
+ * The configuration parameters are kept deliberately simple for ease of use.
+ * Everything has sane defaults.  Complete generality of configuration is *not*
+ * a goal.
+ *
+ * The priority queue operates according to a weighted DRR scheme, combined with
+ * a bandwidth tracker which reuses the shaper logic to detect which side of the
+ * bandwidth sharing threshold the tin is operating.  This determines whether a
+ * priority-based weight (high) or a bandwidth-based weight (low) is used for
+ * that tin in the current pass.
+ *
+ * This qdisc was inspired by Eric Dumazet's fq_codel code, which he kindly
+ * granted us permission to leverage.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/reciprocal_div.h>
+#include <net/netlink.h>
+#include <linux/version.h>
+#include <linux/if_vlan.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/tcp.h>
+#include <net/flow_dissector.h>
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
+#define CAKE_SET_WAYS (8)
+#define CAKE_MAX_TINS (8)
+#define CAKE_QUEUES (1024)
+#define CAKE_FLOW_MASK 63
+#define CAKE_FLOW_NAT_FLAG 64
+
+/* struct cobalt_params - contains codel and blue parameters
+ * @interval:  codel initial drop rate
+ * @target:     maximum persistent sojourn time & blue update rate
+ * @mtu_time:   serialisation delay of maximum-size packet
+ * @p_inc:      increment of blue drop probability (0.32 fxp)
+ * @p_dec:      decrement of blue drop probability (0.32 fxp)
+ */
+struct cobalt_params {
+       u64     interval;
+       u64     target;
+       u64     mtu_time;
+       u32     p_inc;
+       u32     p_dec;
+};
+
+/* struct cobalt_vars - contains codel and blue variables
+ * @count:             codel dropping frequency
+ * @rec_inv_sqrt:      reciprocal value of sqrt(count) >> 1
+ * @drop_next:         time to drop next packet, or when we dropped last
+ * @blue_timer:                Blue time to next drop
+ * @p_drop:            BLUE drop probability (0.32 fxp)
+ * @dropping:          set if in dropping state
+ * @ecn_marked:                set if marked
+ */
+struct cobalt_vars {
+       u32     count;
+       u32     rec_inv_sqrt;
+       ktime_t drop_next;
+       ktime_t blue_timer;
+       u32     p_drop;
+       bool    dropping;
+       bool    ecn_marked;
+};
+
+enum {
+       CAKE_SET_NONE = 0,
+       CAKE_SET_SPARSE,
+       CAKE_SET_SPARSE_WAIT, /* counted in SPARSE, actually in BULK */
+       CAKE_SET_BULK,
+       CAKE_SET_DECAYING
+};
+
+struct cake_flow {
+       /* this stuff is all needed per-flow at dequeue time */
+       struct sk_buff    *head;
+       struct sk_buff    *tail;
+       struct list_head  flowchain;
+       s32               deficit;
+       u32               dropped;
+       struct cobalt_vars cvars;
+       u16               srchost; /* index into cake_host table */
+       u16               dsthost;
+       u8                set;
+}; /* please try to keep this structure <= 64 bytes */
+
+struct cake_host {
+       u32 srchost_tag;
+       u32 dsthost_tag;
+       u16 srchost_refcnt;
+       u16 dsthost_refcnt;
+};
+
+struct cake_heap_entry {
+       u16 t:3, b:10;
+};
+
+struct cake_tin_data {
+       struct cake_flow flows[CAKE_QUEUES];
+       u32     backlogs[CAKE_QUEUES];
+       u32     tags[CAKE_QUEUES]; /* for set association */
+       u16     overflow_idx[CAKE_QUEUES];
+       struct cake_host hosts[CAKE_QUEUES]; /* for triple isolation */
+       u16     flow_quantum;
+
+       struct cobalt_params cparams;
+       u32     drop_overlimit;
+       u16     bulk_flow_count;
+       u16     sparse_flow_count;
+       u16     decaying_flow_count;
+       u16     unresponsive_flow_count;
+
+       u32     max_skblen;
+
+       struct list_head new_flows;
+       struct list_head old_flows;
+       struct list_head decaying_flows;
+
+       /* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+       ktime_t time_next_packet;
+       u64     tin_rate_ns;
+       u64     tin_rate_bps;
+       u16     tin_rate_shft;
+
+       u16     tin_quantum_prio;
+       u16     tin_quantum_band;
+       s32     tin_deficit;
+       u32     tin_backlog;
+       u32     tin_dropped;
+       u32     tin_ecn_mark;
+
+       u32     packets;
+       u64     bytes;
+
+       u32     ack_drops;
+
+       /* moving averages */
+       u64 avge_delay;
+       u64 peak_delay;
+       u64 base_delay;
+
+       /* hash function stats */
+       u32     way_directs;
+       u32     way_hits;
+       u32     way_misses;
+       u32     way_collisions;
+}; /* number of tins is small, so size of this struct doesn't matter much */
+
+struct cake_sched_data {
+       struct tcf_proto __rcu *filter_list; /* optional external classifier */
+       struct tcf_block *block;
+       struct cake_tin_data *tins;
+
+       struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS];
+       u16             overflow_timeout;
+
+       u16             tin_cnt;
+       u8              tin_mode;
+       u8              flow_mode;
+       u8              ack_filter;
+       u8              atm_mode;
+
+       /* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+       u16             rate_shft;
+       ktime_t         time_next_packet;
+       ktime_t         failsafe_next_packet;
+       u64             rate_ns;
+       u64             rate_bps;
+       u16             rate_flags;
+       s16             rate_overhead;
+       u16             rate_mpu;
+       u64             interval;
+       u64             target;
+
+       /* resource tracking */
+       u32             buffer_used;
+       u32             buffer_max_used;
+       u32             buffer_limit;
+       u32             buffer_config_limit;
+
+       /* indices for dequeue */
+       u16             cur_tin;
+       u16             cur_flow;
+
+       struct qdisc_watchdog watchdog;
+       const u8        *tin_index;
+       const u8        *tin_order;
+
+       /* bandwidth capacity estimate */
+       ktime_t         last_packet_time;
+       ktime_t         avg_window_begin;
+       u64             avg_packet_interval;
+       u64             avg_window_bytes;
+       u64             avg_peak_bandwidth;
+       ktime_t         last_reconfig_time;
+
+       /* packet length stats */
+       u32             avg_netoff;
+       u16             max_netlen;
+       u16             max_adjlen;
+       u16             min_netlen;
+       u16             min_adjlen;
+};
+
+enum {
+       CAKE_FLAG_OVERHEAD         = BIT(0),
+       CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
+       CAKE_FLAG_INGRESS          = BIT(2),
+       CAKE_FLAG_WASH             = BIT(3),
+       CAKE_FLAG_SPLIT_GSO        = BIT(4)
+};
+
+/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
+ * obtain the best features of each.  Codel is excellent on flows which
+ * respond to congestion signals in a TCP-like way.  BLUE is more effective on
+ * unresponsive flows.
+ */
+
+struct cobalt_skb_cb {
+       ktime_t enqueue_time;
+       u32     adjusted_len;
+};
+
+static u64 us_to_ns(u64 us)
+{
+       return us * NSEC_PER_USEC;
+}
+
+static struct cobalt_skb_cb *get_cobalt_cb(const struct sk_buff *skb)
+{
+       qdisc_cb_private_validate(skb, sizeof(struct cobalt_skb_cb));
+       return (struct cobalt_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+static ktime_t cobalt_get_enqueue_time(const struct sk_buff *skb)
+{
+       return get_cobalt_cb(skb)->enqueue_time;
+}
+
+static void cobalt_set_enqueue_time(struct sk_buff *skb,
+                                   ktime_t now)
+{
+       get_cobalt_cb(skb)->enqueue_time = now;
+}
+
+static u16 quantum_div[CAKE_QUEUES + 1] = {0};
+
+/* Diffserv lookup tables */
+
+static const u8 precedence[] = {
+       0, 0, 0, 0, 0, 0, 0, 0,
+       1, 1, 1, 1, 1, 1, 1, 1,
+       2, 2, 2, 2, 2, 2, 2, 2,
+       3, 3, 3, 3, 3, 3, 3, 3,
+       4, 4, 4, 4, 4, 4, 4, 4,
+       5, 5, 5, 5, 5, 5, 5, 5,
+       6, 6, 6, 6, 6, 6, 6, 6,
+       7, 7, 7, 7, 7, 7, 7, 7,
+};
+
+static const u8 diffserv8[] = {
+       2, 5, 1, 2, 4, 2, 2, 2,
+       0, 2, 1, 2, 1, 2, 1, 2,
+       5, 2, 4, 2, 4, 2, 4, 2,
+       3, 2, 3, 2, 3, 2, 3, 2,
+       6, 2, 3, 2, 3, 2, 3, 2,
+       6, 2, 2, 2, 6, 2, 6, 2,
+       7, 2, 2, 2, 2, 2, 2, 2,
+       7, 2, 2, 2, 2, 2, 2, 2,
+};
+
+static const u8 diffserv4[] = {
+       0, 2, 0, 0, 2, 0, 0, 0,
+       1, 0, 0, 0, 0, 0, 0, 0,
+       2, 0, 2, 0, 2, 0, 2, 0,
+       2, 0, 2, 0, 2, 0, 2, 0,
+       3, 0, 2, 0, 2, 0, 2, 0,
+       3, 0, 0, 0, 3, 0, 3, 0,
+       3, 0, 0, 0, 0, 0, 0, 0,
+       3, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const u8 diffserv3[] = {
+       0, 0, 0, 0, 2, 0, 0, 0,
+       1, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 2, 0, 2, 0,
+       2, 0, 0, 0, 0, 0, 0, 0,
+       2, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const u8 besteffort[] = {
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* tin priority order for stats dumping */
+
+static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
+static const u8 bulk_order[] = {1, 0, 2, 3};
+
+#define REC_INV_SQRT_CACHE (16)
+static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
+
+/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
+ */
+
+static void cobalt_newton_step(struct cobalt_vars *vars)
+{
+       u32 invsqrt, invsqrt2;
+       u64 val;
+
+       invsqrt = vars->rec_inv_sqrt;
+       invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
+       val = (3LL << 32) - ((u64)vars->count * invsqrt2);
+
+       val >>= 2; /* avoid overflow in following multiply */
+       val = (val * invsqrt) >> (32 - 2 + 1);
+
+       vars->rec_inv_sqrt = val;
+}
+
+static void cobalt_invsqrt(struct cobalt_vars *vars)
+{
+       if (vars->count < REC_INV_SQRT_CACHE)
+               vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
+       else
+               cobalt_newton_step(vars);
+}
+
+/* There is a big difference in timing between the accurate values placed in
+ * the cache and the approximations given by a single Newton step for small
+ * count values, particularly when stepping from count 1 to 2 or vice versa.
+ * Above 16, a single Newton step gives sufficient accuracy in either
+ * direction, given the precision stored.
+ *
+ * The magnitude of the error when stepping up to count 2 is such as to give
+ * the value that *should* have been produced at count 4.
+ */
+
+static void cobalt_cache_init(void)
+{
+       struct cobalt_vars v;
+
+       memset(&v, 0, sizeof(v));
+       v.rec_inv_sqrt = ~0U;
+       cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
+
+       for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
+               cobalt_newton_step(&v);
+               cobalt_newton_step(&v);
+               cobalt_newton_step(&v);
+               cobalt_newton_step(&v);
+
+               cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
+       }
+}
+
+static void cobalt_vars_init(struct cobalt_vars *vars)
+{
+       memset(vars, 0, sizeof(*vars));
+
+       if (!cobalt_rec_inv_sqrt_cache[0]) {
+               cobalt_cache_init();
+               cobalt_rec_inv_sqrt_cache[0] = ~0;
+       }
+}
+
+/* CoDel control_law is t + interval/sqrt(count)
+ * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
+ * both sqrt() and divide operation.
+ */
+static ktime_t cobalt_control(ktime_t t,
+                             u64 interval,
+                             u32 rec_inv_sqrt)
+{
+       return ktime_add_ns(t, reciprocal_scale(interval,
+                                               rec_inv_sqrt));
+}
+
+/* Call this when a packet had to be dropped due to queue overflow.  Returns
+ * true if the BLUE state was quiescent before but active after this call.
+ */
+static bool cobalt_queue_full(struct cobalt_vars *vars,
+                             struct cobalt_params *p,
+                             ktime_t now)
+{
+       bool up = false;
+
+       if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
+               up = !vars->p_drop;
+               vars->p_drop += p->p_inc;
+               if (vars->p_drop < p->p_inc)
+                       vars->p_drop = ~0;
+               vars->blue_timer = now;
+       }
+       vars->dropping = true;
+       vars->drop_next = now;
+       if (!vars->count)
+               vars->count = 1;
+
+       return up;
+}
+
+/* Call this when the queue was serviced but turned out to be empty.  Returns
+ * true if the BLUE state was active before but quiescent after this call.
+ */
+static bool cobalt_queue_empty(struct cobalt_vars *vars,
+                              struct cobalt_params *p,
+                              ktime_t now)
+{
+       bool down = false;
+
+       if (vars->p_drop &&
+           ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
+               if (vars->p_drop < p->p_dec)
+                       vars->p_drop = 0;
+               else
+                       vars->p_drop -= p->p_dec;
+               vars->blue_timer = now;
+               down = !vars->p_drop;
+       }
+       vars->dropping = false;
+
+       if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
+               vars->count--;
+               cobalt_invsqrt(vars);
+               vars->drop_next = cobalt_control(vars->drop_next,
+                                                p->interval,
+                                                vars->rec_inv_sqrt);
+       }
+
+       return down;
+}
+
+/* Call this with a freshly dequeued packet for possible congestion marking.
+ * Returns true as an instruction to drop the packet, false for delivery.
+ */
+static bool cobalt_should_drop(struct cobalt_vars *vars,
+                              struct cobalt_params *p,
+                              ktime_t now,
+                              struct sk_buff *skb,
+                              u32 bulk_flows)
+{
+       bool next_due, over_target, drop = false;
+       ktime_t schedule;
+       u64 sojourn;
+
+/* The 'schedule' variable records, in its sign, whether 'now' is before or
+ * after 'drop_next'.  This allows 'drop_next' to be updated before the next
+ * scheduling decision is actually branched, without destroying that
+ * information.  Similarly, the first 'schedule' value calculated is preserved
+ * in the boolean 'next_due'.
+ *
+ * As for 'drop_next', we take advantage of the fact that 'interval' is both
+ * the delay between first exceeding 'target' and the first signalling event,
+ * *and* the scaling factor for the signalling frequency.  It's therefore very
+ * natural to use a single mechanism for both purposes, and eliminates a
+ * significant amount of reference Codel's spaghetti code.  To help with this,
+ * both the '0' and '1' entries in the invsqrt cache are 0xFFFFFFFF, as close
+ * as possible to 1.0 in fixed-point.
+ */
+
+       sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
+       schedule = ktime_sub(now, vars->drop_next);
+       over_target = sojourn > p->target &&
+                     sojourn > p->mtu_time * bulk_flows * 2 &&
+                     sojourn > p->mtu_time * 4;
+       next_due = vars->count && ktime_to_ns(schedule) >= 0;
+
+       vars->ecn_marked = false;
+
+       if (over_target) {
+               if (!vars->dropping) {
+                       vars->dropping = true;
+                       vars->drop_next = cobalt_control(now,
+                                                        p->interval,
+                                                        vars->rec_inv_sqrt);
+               }
+               if (!vars->count)
+                       vars->count = 1;
+       } else if (vars->dropping) {
+               vars->dropping = false;
+       }
+
+       if (next_due && vars->dropping) {
+               /* Use ECN mark if possible, otherwise drop */
+               drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
+
+               vars->count++;
+               if (!vars->count)
+                       vars->count--;
+               cobalt_invsqrt(vars);
+               vars->drop_next = cobalt_control(vars->drop_next,
+                                                p->interval,
+                                                vars->rec_inv_sqrt);
+               schedule = ktime_sub(now, vars->drop_next);
+       } else {
+               while (next_due) {
+                       vars->count--;
+                       cobalt_invsqrt(vars);
+                       vars->drop_next = cobalt_control(vars->drop_next,
+                                                        p->interval,
+                                                        vars->rec_inv_sqrt);
+                       schedule = ktime_sub(now, vars->drop_next);
+                       next_due = vars->count && ktime_to_ns(schedule) >= 0;
+               }
+       }
+
+       /* Simple BLUE implementation.  Lack of ECN is deliberate. */
+       if (vars->p_drop)
+               drop |= (prandom_u32() < vars->p_drop);
+
+       /* Overload the drop_next field as an activity timeout */
+       if (!vars->count)
+               vars->drop_next = ktime_add_ns(now, p->interval);
+       else if (ktime_to_ns(schedule) > 0 && !drop)
+               vars->drop_next = now;
+
+       return drop;
+}
+
+static void cake_update_flowkeys(struct flow_keys *keys,
+                                const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       struct nf_conntrack_tuple tuple = {};
+       bool rev = !skb->_nfct;
+
+       if (tc_skb_protocol(skb) != htons(ETH_P_IP))
+               return;
+
+       if (!nf_ct_get_tuple_skb(&tuple, skb))
+               return;
+
+       keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+       keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+
+       if (keys->ports.ports) {
+               keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
+               keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
+       }
+#endif
+}
+
+/* Cake has several subtle multiple bit settings. In these cases you
+ *  would be matching triple isolate mode as well.
+ */
+
+static bool cake_dsrc(int flow_mode)
+{
+       return (flow_mode & CAKE_FLOW_DUAL_SRC) == CAKE_FLOW_DUAL_SRC;
+}
+
+static bool cake_ddst(int flow_mode)
+{
+       return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST;
+}
+
+static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
+                    int flow_mode)
+{
+       u32 flow_hash = 0, srchost_hash, dsthost_hash;
+       u16 reduced_hash, srchost_idx, dsthost_idx;
+       struct flow_keys keys, host_keys;
+
+       if (unlikely(flow_mode == CAKE_FLOW_NONE))
+               return 0;
+
+       skb_flow_dissect_flow_keys(skb, &keys,
+                                  FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+       if (flow_mode & CAKE_FLOW_NAT_FLAG)
+               cake_update_flowkeys(&keys, skb);
+
+       /* flow_hash_from_keys() sorts the addresses by value, so we have
+        * to preserve their order in a separate data structure to treat
+        * src and dst host addresses as independently selectable.
+        */
+       host_keys = keys;
+       host_keys.ports.ports     = 0;
+       host_keys.basic.ip_proto  = 0;
+       host_keys.keyid.keyid     = 0;
+       host_keys.tags.flow_label = 0;
+
+       switch (host_keys.control.addr_type) {
+       case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+               host_keys.addrs.v4addrs.src = 0;
+               dsthost_hash = flow_hash_from_keys(&host_keys);
+               host_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+               host_keys.addrs.v4addrs.dst = 0;
+               srchost_hash = flow_hash_from_keys(&host_keys);
+               break;
+
+       case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+               memset(&host_keys.addrs.v6addrs.src, 0,
+                      sizeof(host_keys.addrs.v6addrs.src));
+               dsthost_hash = flow_hash_from_keys(&host_keys);
+               host_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+               memset(&host_keys.addrs.v6addrs.dst, 0,
+                      sizeof(host_keys.addrs.v6addrs.dst));
+               srchost_hash = flow_hash_from_keys(&host_keys);
+               break;
+
+       default:
+               dsthost_hash = 0;
+               srchost_hash = 0;
+       }
+
+       /* This *must* be after the above switch, since as a
+        * side-effect it sorts the src and dst addresses.
+        */
+       if (flow_mode & CAKE_FLOW_FLOWS)
+               flow_hash = flow_hash_from_keys(&keys);
+
+       if (!(flow_mode & CAKE_FLOW_FLOWS)) {
+               if (flow_mode & CAKE_FLOW_SRC_IP)
+                       flow_hash ^= srchost_hash;
+
+               if (flow_mode & CAKE_FLOW_DST_IP)
+                       flow_hash ^= dsthost_hash;
+       }
+
+       reduced_hash = flow_hash % CAKE_QUEUES;
+
+       /* set-associative hashing */
+       /* fast path if no hash collision (direct lookup succeeds) */
+       if (likely(q->tags[reduced_hash] == flow_hash &&
+                  q->flows[reduced_hash].set)) {
+               q->way_directs++;
+       } else {
+               u32 inner_hash = reduced_hash % CAKE_SET_WAYS;
+               u32 outer_hash = reduced_hash - inner_hash;
+               bool allocate_src = false;
+               bool allocate_dst = false;
+               u32 i, k;
+
+               /* check if any active queue in the set is reserved for
+                * this flow.
+                */
+               for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+                    i++, k = (k + 1) % CAKE_SET_WAYS) {
+                       if (q->tags[outer_hash + k] == flow_hash) {
+                               if (i)
+                                       q->way_hits++;
+
+                               if (!q->flows[outer_hash + k].set) {
+                                       /* need to increment host refcnts */
+                                       allocate_src = cake_dsrc(flow_mode);
+                                       allocate_dst = cake_ddst(flow_mode);
+                               }
+
+                               goto found;
+                       }
+               }
+
+               /* no queue is reserved for this flow, look for an
+                * empty one.
+                */
+               for (i = 0; i < CAKE_SET_WAYS;
+                        i++, k = (k + 1) % CAKE_SET_WAYS) {
+                       if (!q->flows[outer_hash + k].set) {
+                               q->way_misses++;
+                               allocate_src = cake_dsrc(flow_mode);
+                               allocate_dst = cake_ddst(flow_mode);
+                               goto found;
+                       }
+               }
+
+               /* With no empty queues, default to the original
+                * queue, accept the collision, update the host tags.
+                */
+               q->way_collisions++;
+               q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
+               q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+               allocate_src = cake_dsrc(flow_mode);
+               allocate_dst = cake_ddst(flow_mode);
+found:
+               /* reserve queue for future packets in same flow */
+               reduced_hash = outer_hash + k;
+               q->tags[reduced_hash] = flow_hash;
+
+               if (allocate_src) {
+                       srchost_idx = srchost_hash % CAKE_QUEUES;
+                       inner_hash = srchost_idx % CAKE_SET_WAYS;
+                       outer_hash = srchost_idx - inner_hash;
+                       for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+                               i++, k = (k + 1) % CAKE_SET_WAYS) {
+                               if (q->hosts[outer_hash + k].srchost_tag ==
+                                   srchost_hash)
+                                       goto found_src;
+                       }
+                       for (i = 0; i < CAKE_SET_WAYS;
+                               i++, k = (k + 1) % CAKE_SET_WAYS) {
+                               if (!q->hosts[outer_hash + k].srchost_refcnt)
+                                       break;
+                       }
+                       q->hosts[outer_hash + k].srchost_tag = srchost_hash;
+found_src:
+                       srchost_idx = outer_hash + k;
+                       q->hosts[srchost_idx].srchost_refcnt++;
+                       q->flows[reduced_hash].srchost = srchost_idx;
+               }
+
+               if (allocate_dst) {
+                       dsthost_idx = dsthost_hash % CAKE_QUEUES;
+                       inner_hash = dsthost_idx % CAKE_SET_WAYS;
+                       outer_hash = dsthost_idx - inner_hash;
+                       for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+                            i++, k = (k + 1) % CAKE_SET_WAYS) {
+                               if (q->hosts[outer_hash + k].dsthost_tag ==
+                                   dsthost_hash)
+                                       goto found_dst;
+                       }
+                       for (i = 0; i < CAKE_SET_WAYS;
+                            i++, k = (k + 1) % CAKE_SET_WAYS) {
+                               if (!q->hosts[outer_hash + k].dsthost_refcnt)
+                                       break;
+                       }
+                       q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
+found_dst:
+                       dsthost_idx = outer_hash + k;
+                       q->hosts[dsthost_idx].dsthost_refcnt++;
+                       q->flows[reduced_hash].dsthost = dsthost_idx;
+               }
+       }
+
+       return reduced_hash;
+}
+
+/* helper functions : might be changed when/if skb use a standard list_head */
+/* remove one skb from head of slot queue */
+
+static struct sk_buff *dequeue_head(struct cake_flow *flow)
+{
+       struct sk_buff *skb = flow->head;
+
+       if (skb) {
+               flow->head = skb->next;
+               skb->next = NULL;
+       }
+
+       return skb;
+}
+
+/* add skb to flow queue (tail add) */
+
+static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb)
+{
+       if (!flow->head)
+               flow->head = skb;
+       else
+               flow->tail->next = skb;
+       flow->tail = skb;
+       skb->next = NULL;
+}
+
+static struct iphdr *cake_get_iphdr(const struct sk_buff *skb,
+                                   struct ipv6hdr *buf)
+{
+       unsigned int offset = skb_network_offset(skb);
+       struct iphdr *iph;
+
+       iph = skb_header_pointer(skb, offset, sizeof(struct iphdr), buf);
+
+       if (!iph)
+               return NULL;
+
+       if (iph->version == 4 && iph->protocol == IPPROTO_IPV6)
+               return skb_header_pointer(skb, offset + iph->ihl * 4,
+                                         sizeof(struct ipv6hdr), buf);
+
+       else if (iph->version == 4)
+               return iph;
+
+       else if (iph->version == 6)
+               return skb_header_pointer(skb, offset, sizeof(struct ipv6hdr),
+                                         buf);
+
+       return NULL;
+}
+
+static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
+                                     void *buf, unsigned int bufsize)
+{
+       unsigned int offset = skb_network_offset(skb);
+       const struct ipv6hdr *ipv6h;
+       const struct tcphdr *tcph;
+       const struct iphdr *iph;
+       struct ipv6hdr _ipv6h;
+       struct tcphdr _tcph;
+
+       ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
+
+       if (!ipv6h)
+               return NULL;
+
+       if (ipv6h->version == 4) {
+               iph = (struct iphdr *)ipv6h;
+               offset += iph->ihl * 4;
+
+               /* special-case 6in4 tunnelling, as that is a common way to get
+                * v6 connectivity in the home
+                */
+               if (iph->protocol == IPPROTO_IPV6) {
+                       ipv6h = skb_header_pointer(skb, offset,
+                                                  sizeof(_ipv6h), &_ipv6h);
+
+                       if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
+                               return NULL;
+
+                       offset += sizeof(struct ipv6hdr);
+
+               } else if (iph->protocol != IPPROTO_TCP) {
+                       return NULL;
+               }
+
+       } else if (ipv6h->version == 6) {
+               if (ipv6h->nexthdr != IPPROTO_TCP)
+                       return NULL;
+
+               offset += sizeof(struct ipv6hdr);
+       } else {
+               return NULL;
+       }
+
+       tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+       if (!tcph)
+               return NULL;
+
+       return skb_header_pointer(skb, offset,
+                                 min(__tcp_hdrlen(tcph), bufsize), buf);
+}
+
+static const void *cake_get_tcpopt(const struct tcphdr *tcph,
+                                  int code, int *oplen)
+{
+       /* inspired by tcp_parse_options in tcp_input.c */
+       int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
+       const u8 *ptr = (const u8 *)(tcph + 1);
+
+       while (length > 0) {
+               int opcode = *ptr++;
+               int opsize;
+
+               if (opcode == TCPOPT_EOL)
+                       break;
+               if (opcode == TCPOPT_NOP) {
+                       length--;
+                       continue;
+               }
+               opsize = *ptr++;
+               if (opsize < 2 || opsize > length)
+                       break;
+
+               if (opcode == code) {
+                       *oplen = opsize;
+                       return ptr;
+               }
+
+               ptr += opsize - 2;
+               length -= opsize;
+       }
+
+       return NULL;
+}
+
+/* Compare two SACK sequences. A sequence is considered greater if it SACKs more
+ * bytes than the other. In the case where both sequences ACKs bytes that the
+ * other doesn't, A is considered greater. DSACKs in A also makes A be
+ * considered greater.
+ *
+ * @return -1, 0 or 1 as normal compare functions
+ */
+static int cake_tcph_sack_compare(const struct tcphdr *tcph_a,
+                                 const struct tcphdr *tcph_b)
+{
+       const struct tcp_sack_block_wire *sack_a, *sack_b;
+       u32 ack_seq_a = ntohl(tcph_a->ack_seq);
+       u32 bytes_a = 0, bytes_b = 0;
+       int oplen_a, oplen_b;
+       bool first = true;
+
+       sack_a = cake_get_tcpopt(tcph_a, TCPOPT_SACK, &oplen_a);
+       sack_b = cake_get_tcpopt(tcph_b, TCPOPT_SACK, &oplen_b);
+
+       /* pointers point to option contents */
+       oplen_a -= TCPOLEN_SACK_BASE;
+       oplen_b -= TCPOLEN_SACK_BASE;
+
+       if (sack_a && oplen_a >= sizeof(*sack_a) &&
+           (!sack_b || oplen_b < sizeof(*sack_b)))
+               return -1;
+       else if (sack_b && oplen_b >= sizeof(*sack_b) &&
+                (!sack_a || oplen_a < sizeof(*sack_a)))
+               return 1;
+       else if ((!sack_a || oplen_a < sizeof(*sack_a)) &&
+                (!sack_b || oplen_b < sizeof(*sack_b)))
+               return 0;
+
+       while (oplen_a >= sizeof(*sack_a)) {
+               const struct tcp_sack_block_wire *sack_tmp = sack_b;
+               u32 start_a = get_unaligned_be32(&sack_a->start_seq);
+               u32 end_a = get_unaligned_be32(&sack_a->end_seq);
+               int oplen_tmp = oplen_b;
+               bool found = false;
+
+               /* DSACK; always considered greater to prevent dropping */
+               if (before(start_a, ack_seq_a))
+                       return -1;
+
+               bytes_a += end_a - start_a;
+
+               while (oplen_tmp >= sizeof(*sack_tmp)) {
+                       u32 start_b = get_unaligned_be32(&sack_tmp->start_seq);
+                       u32 end_b = get_unaligned_be32(&sack_tmp->end_seq);
+
+                       /* first time through we count the total size */
+                       if (first)
+                               bytes_b += end_b - start_b;
+
+                       if (!after(start_b, start_a) && !before(end_b, end_a)) {
+                               found = true;
+                               if (!first)
+                                       break;
+                       }
+                       oplen_tmp -= sizeof(*sack_tmp);
+                       sack_tmp++;
+               }
+
+               if (!found)
+                       return -1;
+
+               oplen_a -= sizeof(*sack_a);
+               sack_a++;
+               first = false;
+       }
+
+       /* If we made it this far, all ranges SACKed by A are covered by B, so
+        * either the SACKs are equal, or B SACKs more bytes.
+        */
+       return bytes_b > bytes_a ? 1 : 0;
+}
+
+static void cake_tcph_get_tstamp(const struct tcphdr *tcph,
+                                u32 *tsval, u32 *tsecr)
+{
+       const u8 *ptr;
+       int opsize;
+
+       ptr = cake_get_tcpopt(tcph, TCPOPT_TIMESTAMP, &opsize);
+
+       if (ptr && opsize == TCPOLEN_TIMESTAMP) {
+               *tsval = get_unaligned_be32(ptr);
+               *tsecr = get_unaligned_be32(ptr + 4);
+       }
+}
+
+static bool cake_tcph_may_drop(const struct tcphdr *tcph,
+                              u32 tstamp_new, u32 tsecr_new)
+{
+       /* inspired by tcp_parse_options in tcp_input.c */
+       int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
+       const u8 *ptr = (const u8 *)(tcph + 1);
+       u32 tstamp, tsecr;
+
+       /* 3 reserved flags must be unset to avoid future breakage
+        * ACK must be set
+        * ECE/CWR are handled separately
+        * All other flags URG/PSH/RST/SYN/FIN must be unset
+        * 0x0FFF0000 = all TCP flags (confirm ACK=1, others zero)
+        * 0x00C00000 = CWR/ECE (handled separately)
+        * 0x0F3F0000 = 0x0FFF0000 & ~0x00C00000
+        */
+       if (((tcp_flag_word(tcph) &
+             cpu_to_be32(0x0F3F0000)) != TCP_FLAG_ACK))
+               return false;
+
+       while (length > 0) {
+               int opcode = *ptr++;
+               int opsize;
+
+               if (opcode == TCPOPT_EOL)
+                       break;
+               if (opcode == TCPOPT_NOP) {
+                       length--;
+                       continue;
+               }
+               opsize = *ptr++;
+               if (opsize < 2 || opsize > length)
+                       break;
+
+               switch (opcode) {
+               case TCPOPT_MD5SIG: /* doesn't influence state */
+                       break;
+
+               case TCPOPT_SACK: /* stricter checking performed later */
+                       if (opsize % 8 != 2)
+                               return false;
+                       break;
+
+               case TCPOPT_TIMESTAMP:
+                       /* only drop timestamps lower than new */
+                       if (opsize != TCPOLEN_TIMESTAMP)
+                               return false;
+                       tstamp = get_unaligned_be32(ptr);
+                       tsecr = get_unaligned_be32(ptr + 4);
+                       if (after(tstamp, tstamp_new) ||
+                           after(tsecr, tsecr_new))
+                               return false;
+                       break;
+
+               case TCPOPT_MSS:  /* these should only be set on SYN */
+               case TCPOPT_WINDOW:
+               case TCPOPT_SACK_PERM:
+               case TCPOPT_FASTOPEN:
+               case TCPOPT_EXP:
+               default: /* don't drop if any unknown options are present */
+                       return false;
+               }
+
+               ptr += opsize - 2;
+               length -= opsize;
+       }
+
+       return true;
+}
+
+static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
+                                      struct cake_flow *flow)
+{
+       bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE;
+       struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL;
+       struct sk_buff *skb_check, *skb_prev = NULL;
+       const struct ipv6hdr *ipv6h, *ipv6h_check;
+       unsigned char _tcph[64], _tcph_check[64];
+       const struct tcphdr *tcph, *tcph_check;
+       const struct iphdr *iph, *iph_check;
+       struct ipv6hdr _iph, _iph_check;
+       const struct sk_buff *skb;
+       int seglen, num_found = 0;
+       u32 tstamp = 0, tsecr = 0;
+       __be32 elig_flags = 0;
+       int sack_comp;
+
+       /* no other possible ACKs to filter */
+       if (flow->head == flow->tail)
+               return NULL;
+
+       skb = flow->tail;
+       tcph = cake_get_tcphdr(skb, _tcph, sizeof(_tcph));
+       iph = cake_get_iphdr(skb, &_iph);
+       if (!tcph)
+               return NULL;
+
+       cake_tcph_get_tstamp(tcph, &tstamp, &tsecr);
+
+       /* the 'triggering' packet need only have the ACK flag set.
+        * also check that SYN is not set, as there won't be any previous ACKs.
+        */
+       if ((tcp_flag_word(tcph) &
+            (TCP_FLAG_ACK | TCP_FLAG_SYN)) != TCP_FLAG_ACK)
+               return NULL;
+
+       /* the 'triggering' ACK is at the tail of the queue, we have already
+        * returned if it is the only packet in the flow. loop through the rest
+        * of the queue looking for pure ACKs with the same 5-tuple as the
+        * triggering one.
+        */
+       for (skb_check = flow->head;
+            skb_check && skb_check != skb;
+            skb_prev = skb_check, skb_check = skb_check->next) {
+               iph_check = cake_get_iphdr(skb_check, &_iph_check);
+               tcph_check = cake_get_tcphdr(skb_check, &_tcph_check,
+                                            sizeof(_tcph_check));
+
+               /* only TCP packets with matching 5-tuple are eligible, and only
+                * drop safe headers
+                */
+               if (!tcph_check || iph->version != iph_check->version ||
+                   tcph_check->source != tcph->source ||
+                   tcph_check->dest != tcph->dest)
+                       continue;
+
+               if (iph_check->version == 4) {
+                       if (iph_check->saddr != iph->saddr ||
+                           iph_check->daddr != iph->daddr)
+                               continue;
+
+                       seglen = ntohs(iph_check->tot_len) -
+                                      (4 * iph_check->ihl);
+               } else if (iph_check->version == 6) {
+                       ipv6h = (struct ipv6hdr *)iph;
+                       ipv6h_check = (struct ipv6hdr *)iph_check;
+
+                       if (ipv6_addr_cmp(&ipv6h_check->saddr, &ipv6h->saddr) ||
+                           ipv6_addr_cmp(&ipv6h_check->daddr, &ipv6h->daddr))
+                               continue;
+
+                       seglen = ntohs(ipv6h_check->payload_len);
+               } else {
+                       WARN_ON(1);  /* shouldn't happen */
+                       continue;
+               }
+
+               /* If the ECE/CWR flags changed from the previous eligible
+                * packet in the same flow, we should no longer be dropping that
+                * previous packet as this would lose information.
+                */
+               if (elig_ack && (tcp_flag_word(tcph_check) &
+                                (TCP_FLAG_ECE | TCP_FLAG_CWR)) != elig_flags) {
+                       elig_ack = NULL;
+                       elig_ack_prev = NULL;
+                       num_found--;
+               }
+
+               /* Check TCP options and flags, don't drop ACKs with segment
+                * data, and don't drop ACKs with a higher cumulative ACK
+                * counter than the triggering packet. Check ACK seqno here to
+                * avoid parsing SACK options of packets we are going to exclude
+                * anyway.
+                */
+               if (!cake_tcph_may_drop(tcph_check, tstamp, tsecr) ||
+                   (seglen - __tcp_hdrlen(tcph_check)) != 0 ||
+                   after(ntohl(tcph_check->ack_seq), ntohl(tcph->ack_seq)))
+                       continue;
+
+               /* Check SACK options. The triggering packet must SACK more data
+                * than the ACK under consideration, or SACK the same range but
+                * have a larger cumulative ACK counter. The latter is a
+                * pathological case, but is contained in the following check
+                * anyway, just to be safe.
+                */
+               sack_comp = cake_tcph_sack_compare(tcph_check, tcph);
+
+               if (sack_comp < 0 ||
+                   (ntohl(tcph_check->ack_seq) == ntohl(tcph->ack_seq) &&
+                    sack_comp == 0))
+                       continue;
+
+               /* At this point we have found an eligible pure ACK to drop; if
+                * we are in aggressive mode, we are done. Otherwise, keep
+                * searching unless this is the second eligible ACK we
+                * found.
+                *
+                * Since we want to drop ACK closest to the head of the queue,
+                * save the first eligible ACK we find, even if we need to loop
+                * again.
+                */
+               if (!elig_ack) {
+                       elig_ack = skb_check;
+                       elig_ack_prev = skb_prev;
+                       elig_flags = (tcp_flag_word(tcph_check)
+                                     & (TCP_FLAG_ECE | TCP_FLAG_CWR));
+               }
+
+               if (num_found++ > 0)
+                       goto found;
+       }
+
+       /* We made it through the queue without finding two eligible ACKs . If
+        * we found a single eligible ACK we can drop it in aggressive mode if
+        * we can guarantee that this does not interfere with ECN flag
+        * information. We ensure this by dropping it only if the enqueued
+        * packet is consecutive with the eligible ACK, and their flags match.
+        */
+       if (elig_ack && aggressive && elig_ack->next == skb &&
+           (elig_flags == (tcp_flag_word(tcph) &
+                           (TCP_FLAG_ECE | TCP_FLAG_CWR))))
+               goto found;
+
+       return NULL;
+
+found:
+       if (elig_ack_prev)
+               elig_ack_prev->next = elig_ack->next;
+       else
+               flow->head = elig_ack->next;
+
+       elig_ack->next = NULL;
+
+       return elig_ack;
+}
+
+static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
+{
+       avg -= avg >> shift;
+       avg += sample >> shift;
+       return avg;
+}
+
+static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
+{
+       if (q->rate_flags & CAKE_FLAG_OVERHEAD)
+               len -= off;
+
+       if (q->max_netlen < len)
+               q->max_netlen = len;
+       if (q->min_netlen > len)
+               q->min_netlen = len;
+
+       len += q->rate_overhead;
+
+       if (len < q->rate_mpu)
+               len = q->rate_mpu;
+
+       if (q->atm_mode == CAKE_ATM_ATM) {
+               len += 47;
+               len /= 48;
+               len *= 53;
+       } else if (q->atm_mode == CAKE_ATM_PTM) {
+               /* Add one byte per 64 bytes or part thereof.
+                * This is conservative and easier to calculate than the
+                * precise value.
+                */
+               len += (len + 63) / 64;
+       }
+
+       if (q->max_adjlen < len)
+               q->max_adjlen = len;
+       if (q->min_adjlen > len)
+               q->min_adjlen = len;
+
+       return len;
+}
+
+static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
+{
+       const struct skb_shared_info *shinfo = skb_shinfo(skb);
+       unsigned int hdr_len, last_len = 0;
+       u32 off = skb_network_offset(skb);
+       u32 len = qdisc_pkt_len(skb);
+       u16 segs = 1;
+
+       q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
+
+       if (!shinfo->gso_size)
+               return cake_calc_overhead(q, len, off);
+
+       /* borrowed from qdisc_pkt_len_init() */
+       hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+       /* + transport layer */
+       if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
+                                               SKB_GSO_TCPV6))) {
+               const struct tcphdr *th;
+               struct tcphdr _tcphdr;
+
+               th = skb_header_pointer(skb, skb_transport_offset(skb),
+                                       sizeof(_tcphdr), &_tcphdr);
+               if (likely(th))
+                       hdr_len += __tcp_hdrlen(th);
+       } else {
+               struct udphdr _udphdr;
+
+               if (skb_header_pointer(skb, skb_transport_offset(skb),
+                                      sizeof(_udphdr), &_udphdr))
+                       hdr_len += sizeof(struct udphdr);
+       }
+
+       if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
+               segs = DIV_ROUND_UP(skb->len - hdr_len,
+                                   shinfo->gso_size);
+       else
+               segs = shinfo->gso_segs;
+
+       len = shinfo->gso_size + hdr_len;
+       last_len = skb->len - shinfo->gso_size * (segs - 1);
+
+       return (cake_calc_overhead(q, len, off) * (segs - 1) +
+               cake_calc_overhead(q, last_len, off));
+}
+
+static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j)
+{
+       struct cake_heap_entry ii = q->overflow_heap[i];
+       struct cake_heap_entry jj = q->overflow_heap[j];
+
+       q->overflow_heap[i] = jj;
+       q->overflow_heap[j] = ii;
+
+       q->tins[ii.t].overflow_idx[ii.b] = j;
+       q->tins[jj.t].overflow_idx[jj.b] = i;
+}
+
+static u32 cake_heap_get_backlog(const struct cake_sched_data *q, u16 i)
+{
+       struct cake_heap_entry ii = q->overflow_heap[i];
+
+       return q->tins[ii.t].backlogs[ii.b];
+}
+
+static void cake_heapify(struct cake_sched_data *q, u16 i)
+{
+       static const u32 a = CAKE_MAX_TINS * CAKE_QUEUES;
+       u32 mb = cake_heap_get_backlog(q, i);
+       u32 m = i;
+
+       while (m < a) {
+               u32 l = m + m + 1;
+               u32 r = l + 1;
+
+               if (l < a) {
+                       u32 lb = cake_heap_get_backlog(q, l);
+
+                       if (lb > mb) {
+                               m  = l;
+                               mb = lb;
+                       }
+               }
+
+               if (r < a) {
+                       u32 rb = cake_heap_get_backlog(q, r);
+
+                       if (rb > mb) {
+                               m  = r;
+                               mb = rb;
+                       }
+               }
+
+               if (m != i) {
+                       cake_heap_swap(q, i, m);
+                       i = m;
+               } else {
+                       break;
+               }
+       }
+}
+
+static void cake_heapify_up(struct cake_sched_data *q, u16 i)
+{
+       while (i > 0 && i < CAKE_MAX_TINS * CAKE_QUEUES) {
+               u16 p = (i - 1) >> 1;
+               u32 ib = cake_heap_get_backlog(q, i);
+               u32 pb = cake_heap_get_backlog(q, p);
+
+               if (ib > pb) {
+                       cake_heap_swap(q, i, p);
+                       i = p;
+               } else {
+                       break;
+               }
+       }
+}
+
+static int cake_advance_shaper(struct cake_sched_data *q,
+                              struct cake_tin_data *b,
+                              struct sk_buff *skb,
+                              ktime_t now, bool drop)
+{
+       u32 len = get_cobalt_cb(skb)->adjusted_len;
+
+       /* charge packet bandwidth to this tin
+        * and to the global shaper.
+        */
+       if (q->rate_ns) {
+               u64 tin_dur = (len * b->tin_rate_ns) >> b->tin_rate_shft;
+               u64 global_dur = (len * q->rate_ns) >> q->rate_shft;
+               u64 failsafe_dur = global_dur + (global_dur >> 1);
+
+               if (ktime_before(b->time_next_packet, now))
+                       b->time_next_packet = ktime_add_ns(b->time_next_packet,
+                                                          tin_dur);
+
+               else if (ktime_before(b->time_next_packet,
+                                     ktime_add_ns(now, tin_dur)))
+                       b->time_next_packet = ktime_add_ns(now, tin_dur);
+
+               q->time_next_packet = ktime_add_ns(q->time_next_packet,
+                                                  global_dur);
+               if (!drop)
+                       q->failsafe_next_packet = \
+                               ktime_add_ns(q->failsafe_next_packet,
+                                            failsafe_dur);
+       }
+       return len;
+}
+
+static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       ktime_t now = ktime_get();
+       u32 idx = 0, tin = 0, len;
+       struct cake_heap_entry qq;
+       struct cake_tin_data *b;
+       struct cake_flow *flow;
+       struct sk_buff *skb;
+
+       if (!q->overflow_timeout) {
+               int i;
+               /* Build fresh max-heap */
+               for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--)
+                       cake_heapify(q, i);
+       }
+       q->overflow_timeout = 65535;
+
+       /* select longest queue for pruning */
+       qq  = q->overflow_heap[0];
+       tin = qq.t;
+       idx = qq.b;
+
+       b = &q->tins[tin];
+       flow = &b->flows[idx];
+       skb = dequeue_head(flow);
+       if (unlikely(!skb)) {
+               /* heap has gone wrong, rebuild it next time */
+               q->overflow_timeout = 0;
+               return idx + (tin << 16);
+       }
+
+       if (cobalt_queue_full(&flow->cvars, &b->cparams, now))
+               b->unresponsive_flow_count++;
+
+       len = qdisc_pkt_len(skb);
+       q->buffer_used      -= skb->truesize;
+       b->backlogs[idx]    -= len;
+       b->tin_backlog      -= len;
+       sch->qstats.backlog -= len;
+       qdisc_tree_reduce_backlog(sch, 1, len);
+
+       flow->dropped++;
+       b->tin_dropped++;
+       sch->qstats.drops++;
+
+       if (q->rate_flags & CAKE_FLAG_INGRESS)
+               cake_advance_shaper(q, b, skb, now, true);
+
+       __qdisc_drop(skb, to_free);
+       sch->q.qlen--;
+
+       cake_heapify(q, 0);
+
+       return idx + (tin << 16);
+}
+
+static void cake_wash_diffserv(struct sk_buff *skb)
+{
+       switch (skb->protocol) {
+       case htons(ETH_P_IP):
+               ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+               break;
+       case htons(ETH_P_IPV6):
+               ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+               break;
+       default:
+               break;
+       }
+}
+
+static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+{
+       u8 dscp;
+
+       switch (skb->protocol) {
+       case htons(ETH_P_IP):
+               dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+               if (wash && dscp)
+                       ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+               return dscp;
+
+       case htons(ETH_P_IPV6):
+               dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+               if (wash && dscp)
+                       ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+               return dscp;
+
+       case htons(ETH_P_ARP):
+               return 0x38;  /* CS7 - Net Control */
+
+       default:
+               /* If there is no Diffserv field, treat as best-effort */
+               return 0;
+       }
+}
+
+static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
+                                            struct sk_buff *skb)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 tin;
+
+       if (TC_H_MAJ(skb->priority) == sch->handle &&
+           TC_H_MIN(skb->priority) > 0 &&
+           TC_H_MIN(skb->priority) <= q->tin_cnt) {
+               tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
+
+               if (q->rate_flags & CAKE_FLAG_WASH)
+                       cake_wash_diffserv(skb);
+       } else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
+               /* extract the Diffserv Precedence field, if it exists */
+               /* and clear DSCP bits if washing */
+               tin = q->tin_index[cake_handle_diffserv(skb,
+                               q->rate_flags & CAKE_FLAG_WASH)];
+               if (unlikely(tin >= q->tin_cnt))
+                       tin = 0;
+       } else {
+               tin = 0;
+               if (q->rate_flags & CAKE_FLAG_WASH)
+                       cake_wash_diffserv(skb);
+       }
+
+       return &q->tins[tin];
+}
+
+static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
+                        struct sk_buff *skb, int flow_mode, int *qerr)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       struct tcf_proto *filter;
+       struct tcf_result res;
+       u32 flow = 0;
+       int result;
+
+       filter = rcu_dereference_bh(q->filter_list);
+       if (!filter)
+               goto hash;
+
+       *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+       result = tcf_classify(skb, filter, &res, false);
+
+       if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+               switch (result) {
+               case TC_ACT_STOLEN:
+               case TC_ACT_QUEUED:
+               case TC_ACT_TRAP:
+                       *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+                       /* fall through */
+               case TC_ACT_SHOT:
+                       return 0;
+               }
+#endif
+               if (TC_H_MIN(res.classid) <= CAKE_QUEUES)
+                       flow = TC_H_MIN(res.classid);
+       }
+hash:
+       *t = cake_select_tin(sch, skb);
+       return flow ?: cake_hash(*t, skb, flow_mode) + 1;
+}
+
+static void cake_reconfigure(struct Qdisc *sch);
+
+static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+                       struct sk_buff **to_free)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       int len = qdisc_pkt_len(skb);
+       int uninitialized_var(ret);
+       struct sk_buff *ack = NULL;
+       ktime_t now = ktime_get();
+       struct cake_tin_data *b;
+       struct cake_flow *flow;
+       u32 idx;
+
+       /* choose flow to insert into */
+       idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
+       if (idx == 0) {
+               if (ret & __NET_XMIT_BYPASS)
+                       qdisc_qstats_drop(sch);
+               __qdisc_drop(skb, to_free);
+               return ret;
+       }
+       idx--;
+       flow = &b->flows[idx];
+
+       /* ensure shaper state isn't stale */
+       if (!b->tin_backlog) {
+               if (ktime_before(b->time_next_packet, now))
+                       b->time_next_packet = now;
+
+               if (!sch->q.qlen) {
+                       if (ktime_before(q->time_next_packet, now)) {
+                               q->failsafe_next_packet = now;
+                               q->time_next_packet = now;
+                       } else if (ktime_after(q->time_next_packet, now) &&
+                                  ktime_after(q->failsafe_next_packet, now)) {
+                               u64 next = \
+                                       min(ktime_to_ns(q->time_next_packet),
+                                           ktime_to_ns(
+                                                  q->failsafe_next_packet));
+                               sch->qstats.overlimits++;
+                               qdisc_watchdog_schedule_ns(&q->watchdog, next);
+                       }
+               }
+       }
+
+       if (unlikely(len > b->max_skblen))
+               b->max_skblen = len;
+
+       if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) {
+               struct sk_buff *segs, *nskb;
+               netdev_features_t features = netif_skb_features(skb);
+               unsigned int slen = 0;
+
+               segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+               if (IS_ERR_OR_NULL(segs))
+                       return qdisc_drop(skb, sch, to_free);
+
+               while (segs) {
+                       nskb = segs->next;
+                       segs->next = NULL;
+                       qdisc_skb_cb(segs)->pkt_len = segs->len;
+                       cobalt_set_enqueue_time(segs, now);
+                       get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
+                                                                         segs);
+                       flow_queue_add(flow, segs);
+
+                       sch->q.qlen++;
+                       slen += segs->len;
+                       q->buffer_used += segs->truesize;
+                       b->packets++;
+                       segs = nskb;
+               }
+
+               /* stats */
+               b->bytes            += slen;
+               b->backlogs[idx]    += slen;
+               b->tin_backlog      += slen;
+               sch->qstats.backlog += slen;
+               q->avg_window_bytes += slen;
+
+               qdisc_tree_reduce_backlog(sch, 1, len);
+               consume_skb(skb);
+       } else {
+               /* not splitting */
+               cobalt_set_enqueue_time(skb, now);
+               get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
+               flow_queue_add(flow, skb);
+
+               if (q->ack_filter)
+                       ack = cake_ack_filter(q, flow);
+
+               if (ack) {
+                       b->ack_drops++;
+                       sch->qstats.drops++;
+                       b->bytes += qdisc_pkt_len(ack);
+                       len -= qdisc_pkt_len(ack);
+                       q->buffer_used += skb->truesize - ack->truesize;
+                       if (q->rate_flags & CAKE_FLAG_INGRESS)
+                               cake_advance_shaper(q, b, ack, now, true);
+
+                       qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack));
+                       consume_skb(ack);
+               } else {
+                       sch->q.qlen++;
+                       q->buffer_used      += skb->truesize;
+               }
+
+               /* stats */
+               b->packets++;
+               b->bytes            += len;
+               b->backlogs[idx]    += len;
+               b->tin_backlog      += len;
+               sch->qstats.backlog += len;
+               q->avg_window_bytes += len;
+       }
+
+       if (q->overflow_timeout)
+               cake_heapify_up(q, b->overflow_idx[idx]);
+
+       /* incoming bandwidth capacity estimate */
+       if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) {
+               u64 packet_interval = \
+                       ktime_to_ns(ktime_sub(now, q->last_packet_time));
+
+               if (packet_interval > NSEC_PER_SEC)
+                       packet_interval = NSEC_PER_SEC;
+
+               /* filter out short-term bursts, eg. wifi aggregation */
+               q->avg_packet_interval = \
+                       cake_ewma(q->avg_packet_interval,
+                                 packet_interval,
+                                 (packet_interval > q->avg_packet_interval ?
+                                         2 : 8));
+
+               q->last_packet_time = now;
+
+               if (packet_interval > q->avg_packet_interval) {
+                       u64 window_interval = \
+                               ktime_to_ns(ktime_sub(now,
+                                                     q->avg_window_begin));
+                       u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
+
+                       do_div(b, window_interval);
+                       q->avg_peak_bandwidth =
+                               cake_ewma(q->avg_peak_bandwidth, b,
+                                         b > q->avg_peak_bandwidth ? 2 : 8);
+                       q->avg_window_bytes = 0;
+                       q->avg_window_begin = now;
+
+                       if (ktime_after(now,
+                                       ktime_add_ms(q->last_reconfig_time,
+                                                    250))) {
+                               q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4;
+                               cake_reconfigure(sch);
+                       }
+               }
+       } else {
+               q->avg_window_bytes = 0;
+               q->last_packet_time = now;
+       }
+
+       /* flowchain */
+       if (!flow->set || flow->set == CAKE_SET_DECAYING) {
+               struct cake_host *srchost = &b->hosts[flow->srchost];
+               struct cake_host *dsthost = &b->hosts[flow->dsthost];
+               u16 host_load = 1;
+
+               if (!flow->set) {
+                       list_add_tail(&flow->flowchain, &b->new_flows);
+               } else {
+                       b->decaying_flow_count--;
+                       list_move_tail(&flow->flowchain, &b->new_flows);
+               }
+               flow->set = CAKE_SET_SPARSE;
+               b->sparse_flow_count++;
+
+               if (cake_dsrc(q->flow_mode))
+                       host_load = max(host_load, srchost->srchost_refcnt);
+
+               if (cake_ddst(q->flow_mode))
+                       host_load = max(host_load, dsthost->dsthost_refcnt);
+
+               flow->deficit = (b->flow_quantum *
+                                quantum_div[host_load]) >> 16;
+       } else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+               /* this flow was empty, accounted as a sparse flow, but actually
+                * in the bulk rotation.
+                */
+               flow->set = CAKE_SET_BULK;
+               b->sparse_flow_count--;
+               b->bulk_flow_count++;
+       }
+
+       if (q->buffer_used > q->buffer_max_used)
+               q->buffer_max_used = q->buffer_used;
+
+       if (q->buffer_used > q->buffer_limit) {
+               u32 dropped = 0;
+
+               while (q->buffer_used > q->buffer_limit) {
+                       dropped++;
+                       cake_drop(sch, to_free);
+               }
+               b->drop_overlimit += dropped;
+       }
+       return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       struct cake_tin_data *b = &q->tins[q->cur_tin];
+       struct cake_flow *flow = &b->flows[q->cur_flow];
+       struct sk_buff *skb = NULL;
+       u32 len;
+
+       if (flow->head) {
+               skb = dequeue_head(flow);
+               len = qdisc_pkt_len(skb);
+               b->backlogs[q->cur_flow] -= len;
+               b->tin_backlog           -= len;
+               sch->qstats.backlog      -= len;
+               q->buffer_used           -= skb->truesize;
+               sch->q.qlen--;
+
+               if (q->overflow_timeout)
+                       cake_heapify(q, b->overflow_idx[q->cur_flow]);
+       }
+       return skb;
+}
+
+/* Discard leftover packets from a tin no longer in use. */
+static void cake_clear_tin(struct Qdisc *sch, u16 tin)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       struct sk_buff *skb;
+
+       q->cur_tin = tin;
+       for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++)
+               while (!!(skb = cake_dequeue_one(sch)))
+                       kfree_skb(skb);
+}
+
+static struct sk_buff *cake_dequeue(struct Qdisc *sch)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       struct cake_tin_data *b = &q->tins[q->cur_tin];
+       struct cake_host *srchost, *dsthost;
+       ktime_t now = ktime_get();
+       struct cake_flow *flow;
+       struct list_head *head;
+       bool first_flow = true;
+       struct sk_buff *skb;
+       u16 host_load;
+       u64 delay;
+       u32 len;
+
+begin:
+       if (!sch->q.qlen)
+               return NULL;
+
+       /* global hard shaper */
+       if (ktime_after(q->time_next_packet, now) &&
+           ktime_after(q->failsafe_next_packet, now)) {
+               u64 next = min(ktime_to_ns(q->time_next_packet),
+                              ktime_to_ns(q->failsafe_next_packet));
+
+               sch->qstats.overlimits++;
+               qdisc_watchdog_schedule_ns(&q->watchdog, next);
+               return NULL;
+       }
+
+       /* Choose a class to work on. */
+       if (!q->rate_ns) {
+               /* In unlimited mode, can't rely on shaper timings, just balance
+                * with DRR
+                */
+               bool wrapped = false, empty = true;
+
+               while (b->tin_deficit < 0 ||
+                      !(b->sparse_flow_count + b->bulk_flow_count)) {
+                       if (b->tin_deficit <= 0)
+                               b->tin_deficit += b->tin_quantum_band;
+                       if (b->sparse_flow_count + b->bulk_flow_count)
+                               empty = false;
+
+                       q->cur_tin++;
+                       b++;
+                       if (q->cur_tin >= q->tin_cnt) {
+                               q->cur_tin = 0;
+                               b = q->tins;
+
+                               if (wrapped) {
+                                       /* It's possible for q->qlen to be
+                                        * nonzero when we actually have no
+                                        * packets anywhere.
+                                        */
+                                       if (empty)
+                                               return NULL;
+                               } else {
+                                       wrapped = true;
+                               }
+                       }
+               }
+       } else {
+               /* In shaped mode, choose:
+                * - Highest-priority tin with queue and meeting schedule, or
+                * - The earliest-scheduled tin with queue.
+                */
+               ktime_t best_time = KTIME_MAX;
+               int tin, best_tin = 0;
+
+               for (tin = 0; tin < q->tin_cnt; tin++) {
+                       b = q->tins + tin;
+                       if ((b->sparse_flow_count + b->bulk_flow_count) > 0) {
+                               ktime_t time_to_pkt = \
+                                       ktime_sub(b->time_next_packet, now);
+
+                               if (ktime_to_ns(time_to_pkt) <= 0 ||
+                                   ktime_compare(time_to_pkt,
+                                                 best_time) <= 0) {
+                                       best_time = time_to_pkt;
+                                       best_tin = tin;
+                               }
+                       }
+               }
+
+               q->cur_tin = best_tin;
+               b = q->tins + best_tin;
+
+               /* No point in going further if no packets to deliver. */
+               if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
+                       return NULL;
+       }
+
+retry:
+       /* service this class */
+       head = &b->decaying_flows;
+       if (!first_flow || list_empty(head)) {
+               head = &b->new_flows;
+               if (list_empty(head)) {
+                       head = &b->old_flows;
+                       if (unlikely(list_empty(head))) {
+                               head = &b->decaying_flows;
+                               if (unlikely(list_empty(head)))
+                                       goto begin;
+                       }
+               }
+       }
+       flow = list_first_entry(head, struct cake_flow, flowchain);
+       q->cur_flow = flow - b->flows;
+       first_flow = false;
+
+       /* triple isolation (modified DRR++) */
+       srchost = &b->hosts[flow->srchost];
+       dsthost = &b->hosts[flow->dsthost];
+       host_load = 1;
+
+       if (cake_dsrc(q->flow_mode))
+               host_load = max(host_load, srchost->srchost_refcnt);
+
+       if (cake_ddst(q->flow_mode))
+               host_load = max(host_load, dsthost->dsthost_refcnt);
+
+       WARN_ON(host_load > CAKE_QUEUES);
+
+       /* flow isolation (DRR++) */
+       if (flow->deficit <= 0) {
+               /* The shifted prandom_u32() is a way to apply dithering to
+                * avoid accumulating roundoff errors
+                */
+               flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+                                 (prandom_u32() >> 16)) >> 16;
+               list_move_tail(&flow->flowchain, &b->old_flows);
+
+               /* Keep all flows with deficits out of the sparse and decaying
+                * rotations.  No non-empty flow can go into the decaying
+                * rotation, so they can't get deficits
+                */
+               if (flow->set == CAKE_SET_SPARSE) {
+                       if (flow->head) {
+                               b->sparse_flow_count--;
+                               b->bulk_flow_count++;
+                               flow->set = CAKE_SET_BULK;
+                       } else {
+                               /* we've moved it to the bulk rotation for
+                                * correct deficit accounting but we still want
+                                * to count it as a sparse flow, not a bulk one.
+                                */
+                               flow->set = CAKE_SET_SPARSE_WAIT;
+                       }
+               }
+               goto retry;
+       }
+
+       /* Retrieve a packet via the AQM */
+       while (1) {
+               skb = cake_dequeue_one(sch);
+               if (!skb) {
+                       /* this queue was actually empty */
+                       if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
+                               b->unresponsive_flow_count--;
+
+                       if (flow->cvars.p_drop || flow->cvars.count ||
+                           ktime_before(now, flow->cvars.drop_next)) {
+                               /* keep in the flowchain until the state has
+                                * decayed to rest
+                                */
+                               list_move_tail(&flow->flowchain,
+                                              &b->decaying_flows);
+                               if (flow->set == CAKE_SET_BULK) {
+                                       b->bulk_flow_count--;
+                                       b->decaying_flow_count++;
+                               } else if (flow->set == CAKE_SET_SPARSE ||
+                                          flow->set == CAKE_SET_SPARSE_WAIT) {
+                                       b->sparse_flow_count--;
+                                       b->decaying_flow_count++;
+                               }
+                               flow->set = CAKE_SET_DECAYING;
+                       } else {
+                               /* remove empty queue from the flowchain */
+                               list_del_init(&flow->flowchain);
+                               if (flow->set == CAKE_SET_SPARSE ||
+                                   flow->set == CAKE_SET_SPARSE_WAIT)
+                                       b->sparse_flow_count--;
+                               else if (flow->set == CAKE_SET_BULK)
+                                       b->bulk_flow_count--;
+                               else
+                                       b->decaying_flow_count--;
+
+                               flow->set = CAKE_SET_NONE;
+                               srchost->srchost_refcnt--;
+                               dsthost->dsthost_refcnt--;
+                       }
+                       goto begin;
+               }
+
+               /* Last packet in queue may be marked, shouldn't be dropped */
+               if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
+                                       (b->bulk_flow_count *
+                                        !!(q->rate_flags &
+                                           CAKE_FLAG_INGRESS))) ||
+                   !flow->head)
+                       break;
+
+               /* drop this packet, get another one */
+               if (q->rate_flags & CAKE_FLAG_INGRESS) {
+                       len = cake_advance_shaper(q, b, skb,
+                                                 now, true);
+                       flow->deficit -= len;
+                       b->tin_deficit -= len;
+               }
+               flow->dropped++;
+               b->tin_dropped++;
+               qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
+               qdisc_qstats_drop(sch);
+               kfree_skb(skb);
+               if (q->rate_flags & CAKE_FLAG_INGRESS)
+                       goto retry;
+       }
+
+       b->tin_ecn_mark += !!flow->cvars.ecn_marked;
+       qdisc_bstats_update(sch, skb);
+
+       /* collect delay stats */
+       delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
+       b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
+       b->peak_delay = cake_ewma(b->peak_delay, delay,
+                                 delay > b->peak_delay ? 2 : 8);
+       b->base_delay = cake_ewma(b->base_delay, delay,
+                                 delay < b->base_delay ? 2 : 8);
+
+       len = cake_advance_shaper(q, b, skb, now, false);
+       flow->deficit -= len;
+       b->tin_deficit -= len;
+
+       if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
+               u64 next = min(ktime_to_ns(q->time_next_packet),
+                              ktime_to_ns(q->failsafe_next_packet));
+
+               qdisc_watchdog_schedule_ns(&q->watchdog, next);
+       } else if (!sch->q.qlen) {
+               int i;
+
+               for (i = 0; i < q->tin_cnt; i++) {
+                       if (q->tins[i].decaying_flow_count) {
+                               ktime_t next = \
+                                       ktime_add_ns(now,
+                                                    q->tins[i].cparams.target);
+
+                               qdisc_watchdog_schedule_ns(&q->watchdog,
+                                                          ktime_to_ns(next));
+                               break;
+                       }
+               }
+       }
+
+       if (q->overflow_timeout)
+               q->overflow_timeout--;
+
+       return skb;
+}
+
+static void cake_reset(struct Qdisc *sch)
+{
+       u32 c;
+
+       for (c = 0; c < CAKE_MAX_TINS; c++)
+               cake_clear_tin(sch, c);
+}
+
+static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
+       [TCA_CAKE_BASE_RATE64]   = { .type = NLA_U64 },
+       [TCA_CAKE_DIFFSERV_MODE] = { .type = NLA_U32 },
+       [TCA_CAKE_ATM]           = { .type = NLA_U32 },
+       [TCA_CAKE_FLOW_MODE]     = { .type = NLA_U32 },
+       [TCA_CAKE_OVERHEAD]      = { .type = NLA_S32 },
+       [TCA_CAKE_RTT]           = { .type = NLA_U32 },
+       [TCA_CAKE_TARGET]        = { .type = NLA_U32 },
+       [TCA_CAKE_AUTORATE]      = { .type = NLA_U32 },
+       [TCA_CAKE_MEMORY]        = { .type = NLA_U32 },
+       [TCA_CAKE_NAT]           = { .type = NLA_U32 },
+       [TCA_CAKE_RAW]           = { .type = NLA_U32 },
+       [TCA_CAKE_WASH]          = { .type = NLA_U32 },
+       [TCA_CAKE_MPU]           = { .type = NLA_U32 },
+       [TCA_CAKE_INGRESS]       = { .type = NLA_U32 },
+       [TCA_CAKE_ACK_FILTER]    = { .type = NLA_U32 },
+};
+
+static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
+                         u64 target_ns, u64 rtt_est_ns)
+{
+       /* convert byte-rate into time-per-byte
+        * so it will always unwedge in reasonable time.
+        */
+       static const u64 MIN_RATE = 64;
+       u32 byte_target = mtu;
+       u64 byte_target_ns;
+       u8  rate_shft = 0;
+       u64 rate_ns = 0;
+
+       b->flow_quantum = 1514;
+       if (rate) {
+               b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL);
+               rate_shft = 34;
+               rate_ns = ((u64)NSEC_PER_SEC) << rate_shft;
+               rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate));
+               while (!!(rate_ns >> 34)) {
+                       rate_ns >>= 1;
+                       rate_shft--;
+               }
+       } /* else unlimited, ie. zero delay */
+
+       b->tin_rate_bps  = rate;
+       b->tin_rate_ns   = rate_ns;
+       b->tin_rate_shft = rate_shft;
+
+       byte_target_ns = (byte_target * rate_ns) >> rate_shft;
+
+       b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
+       b->cparams.interval = max(rtt_est_ns +
+                                    b->cparams.target - target_ns,
+                                    b->cparams.target * 2);
+       b->cparams.mtu_time = byte_target_ns;
+       b->cparams.p_inc = 1 << 24; /* 1/256 */
+       b->cparams.p_dec = 1 << 20; /* 1/4096 */
+}
+
+static int cake_config_besteffort(struct Qdisc *sch)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       struct cake_tin_data *b = &q->tins[0];
+       u32 mtu = psched_mtu(qdisc_dev(sch));
+       u64 rate = q->rate_bps;
+
+       q->tin_cnt = 1;
+
+       q->tin_index = besteffort;
+       q->tin_order = normal_order;
+
+       cake_set_rate(b, rate, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+       b->tin_quantum_band = 65535;
+       b->tin_quantum_prio = 65535;
+
+       return 0;
+}
+
+static int cake_config_precedence(struct Qdisc *sch)
+{
+       /* convert high-level (user visible) parameters into internal format */
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 mtu = psched_mtu(qdisc_dev(sch));
+       u64 rate = q->rate_bps;
+       u32 quantum1 = 256;
+       u32 quantum2 = 256;
+       u32 i;
+
+       q->tin_cnt = 8;
+       q->tin_index = precedence;
+       q->tin_order = normal_order;
+
+       for (i = 0; i < q->tin_cnt; i++) {
+               struct cake_tin_data *b = &q->tins[i];
+
+               cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+                             us_to_ns(q->interval));
+
+               b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+               b->tin_quantum_band = max_t(u16, 1U, quantum2);
+
+               /* calculate next class's parameters */
+               rate  *= 7;
+               rate >>= 3;
+
+               quantum1  *= 3;
+               quantum1 >>= 1;
+
+               quantum2  *= 7;
+               quantum2 >>= 3;
+       }
+
+       return 0;
+}
+
+/*     List of known Diffserv codepoints:
+ *
+ *     Least Effort (CS1)
+ *     Best Effort (CS0)
+ *     Max Reliability & LLT "Lo" (TOS1)
+ *     Max Throughput (TOS2)
+ *     Min Delay (TOS4)
+ *     LLT "La" (TOS5)
+ *     Assured Forwarding 1 (AF1x) - x3
+ *     Assured Forwarding 2 (AF2x) - x3
+ *     Assured Forwarding 3 (AF3x) - x3
+ *     Assured Forwarding 4 (AF4x) - x3
+ *     Precedence Class 2 (CS2)
+ *     Precedence Class 3 (CS3)
+ *     Precedence Class 4 (CS4)
+ *     Precedence Class 5 (CS5)
+ *     Precedence Class 6 (CS6)
+ *     Precedence Class 7 (CS7)
+ *     Voice Admit (VA)
+ *     Expedited Forwarding (EF)
+
+ *     Total 25 codepoints.
+ */
+
+/*     List of traffic classes in RFC 4594:
+ *             (roughly descending order of contended priority)
+ *             (roughly ascending order of uncontended throughput)
+ *
+ *     Network Control (CS6,CS7)      - routing traffic
+ *     Telephony (EF,VA)         - aka. VoIP streams
+ *     Signalling (CS5)               - VoIP setup
+ *     Multimedia Conferencing (AF4x) - aka. video calls
+ *     Realtime Interactive (CS4)     - eg. games
+ *     Multimedia Streaming (AF3x)    - eg. YouTube, NetFlix, Twitch
+ *     Broadcast Video (CS3)
+ *     Low Latency Data (AF2x,TOS4)      - eg. database
+ *     Ops, Admin, Management (CS2,TOS1) - eg. ssh
+ *     Standard Service (CS0 & unrecognised codepoints)
+ *     High Throughput Data (AF1x,TOS2)  - eg. web traffic
+ *     Low Priority Data (CS1)           - eg. BitTorrent
+
+ *     Total 12 traffic classes.
+ */
+
+static int cake_config_diffserv8(struct Qdisc *sch)
+{
+/*     Pruned list of traffic classes for typical applications:
+ *
+ *             Network Control          (CS6, CS7)
+ *             Minimum Latency          (EF, VA, CS5, CS4)
+ *             Interactive Shell        (CS2, TOS1)
+ *             Low Latency Transactions (AF2x, TOS4)
+ *             Video Streaming          (AF4x, AF3x, CS3)
+ *             Bog Standard             (CS0 etc.)
+ *             High Throughput          (AF1x, TOS2)
+ *             Background Traffic       (CS1)
+ *
+ *             Total 8 traffic classes.
+ */
+
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 mtu = psched_mtu(qdisc_dev(sch));
+       u64 rate = q->rate_bps;
+       u32 quantum1 = 256;
+       u32 quantum2 = 256;
+       u32 i;
+
+       q->tin_cnt = 8;
+
+       /* codepoint to class mapping */
+       q->tin_index = diffserv8;
+       q->tin_order = normal_order;
+
+       /* class characteristics */
+       for (i = 0; i < q->tin_cnt; i++) {
+               struct cake_tin_data *b = &q->tins[i];
+
+               cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+                             us_to_ns(q->interval));
+
+               b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+               b->tin_quantum_band = max_t(u16, 1U, quantum2);
+
+               /* calculate next class's parameters */
+               rate  *= 7;
+               rate >>= 3;
+
+               quantum1  *= 3;
+               quantum1 >>= 1;
+
+               quantum2  *= 7;
+               quantum2 >>= 3;
+       }
+
+       return 0;
+}
+
+static int cake_config_diffserv4(struct Qdisc *sch)
+{
+/*  Further pruned list of traffic classes for four-class system:
+ *
+ *         Latency Sensitive  (CS7, CS6, EF, VA, CS5, CS4)
+ *         Streaming Media    (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
+ *         Best Effort        (CS0, AF1x, TOS2, and those not specified)
+ *         Background Traffic (CS1)
+ *
+ *             Total 4 traffic classes.
+ */
+
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 mtu = psched_mtu(qdisc_dev(sch));
+       u64 rate = q->rate_bps;
+       u32 quantum = 1024;
+
+       q->tin_cnt = 4;
+
+       /* codepoint to class mapping */
+       q->tin_index = diffserv4;
+       q->tin_order = bulk_order;
+
+       /* class characteristics */
+       cake_set_rate(&q->tins[0], rate, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+       cake_set_rate(&q->tins[1], rate >> 4, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+       cake_set_rate(&q->tins[2], rate >> 1, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+       cake_set_rate(&q->tins[3], rate >> 2, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+
+       /* priority weights */
+       q->tins[0].tin_quantum_prio = quantum;
+       q->tins[1].tin_quantum_prio = quantum >> 4;
+       q->tins[2].tin_quantum_prio = quantum << 2;
+       q->tins[3].tin_quantum_prio = quantum << 4;
+
+       /* bandwidth-sharing weights */
+       q->tins[0].tin_quantum_band = quantum;
+       q->tins[1].tin_quantum_band = quantum >> 4;
+       q->tins[2].tin_quantum_band = quantum >> 1;
+       q->tins[3].tin_quantum_band = quantum >> 2;
+
+       return 0;
+}
+
+static int cake_config_diffserv3(struct Qdisc *sch)
+{
+/*  Simplified Diffserv structure with 3 tins.
+ *             Low Priority            (CS1)
+ *             Best Effort
+ *             Latency Sensitive       (TOS4, VA, EF, CS6, CS7)
+ */
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 mtu = psched_mtu(qdisc_dev(sch));
+       u64 rate = q->rate_bps;
+       u32 quantum = 1024;
+
+       q->tin_cnt = 3;
+
+       /* codepoint to class mapping */
+       q->tin_index = diffserv3;
+       q->tin_order = bulk_order;
+
+       /* class characteristics */
+       cake_set_rate(&q->tins[0], rate, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+       cake_set_rate(&q->tins[1], rate >> 4, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+       cake_set_rate(&q->tins[2], rate >> 2, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+
+       /* priority weights */
+       q->tins[0].tin_quantum_prio = quantum;
+       q->tins[1].tin_quantum_prio = quantum >> 4;
+       q->tins[2].tin_quantum_prio = quantum << 4;
+
+       /* bandwidth-sharing weights */
+       q->tins[0].tin_quantum_band = quantum;
+       q->tins[1].tin_quantum_band = quantum >> 4;
+       q->tins[2].tin_quantum_band = quantum >> 2;
+
+       return 0;
+}
+
+static void cake_reconfigure(struct Qdisc *sch)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       int c, ft;
+
+       switch (q->tin_mode) {
+       case CAKE_DIFFSERV_BESTEFFORT:
+               ft = cake_config_besteffort(sch);
+               break;
+
+       case CAKE_DIFFSERV_PRECEDENCE:
+               ft = cake_config_precedence(sch);
+               break;
+
+       case CAKE_DIFFSERV_DIFFSERV8:
+               ft = cake_config_diffserv8(sch);
+               break;
+
+       case CAKE_DIFFSERV_DIFFSERV4:
+               ft = cake_config_diffserv4(sch);
+               break;
+
+       case CAKE_DIFFSERV_DIFFSERV3:
+       default:
+               ft = cake_config_diffserv3(sch);
+               break;
+       }
+
+       for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) {
+               cake_clear_tin(sch, c);
+               q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time;
+       }
+
+       q->rate_ns   = q->tins[ft].tin_rate_ns;
+       q->rate_shft = q->tins[ft].tin_rate_shft;
+
+       if (q->buffer_config_limit) {
+               q->buffer_limit = q->buffer_config_limit;
+       } else if (q->rate_bps) {
+               u64 t = q->rate_bps * q->interval;
+
+               do_div(t, USEC_PER_SEC / 4);
+               q->buffer_limit = max_t(u32, t, 4U << 20);
+       } else {
+               q->buffer_limit = ~0;
+       }
+
+       sch->flags &= ~TCQ_F_CAN_BYPASS;
+
+       q->buffer_limit = min(q->buffer_limit,
+                             max(sch->limit * psched_mtu(qdisc_dev(sch)),
+                                 q->buffer_config_limit));
+}
+
+static int cake_change(struct Qdisc *sch, struct nlattr *opt,
+                      struct netlink_ext_ack *extack)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       struct nlattr *tb[TCA_CAKE_MAX + 1];
+       int err;
+
+       if (!opt)
+               return -EINVAL;
+
+       err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack);
+       if (err < 0)
+               return err;
+
+       if (tb[TCA_CAKE_NAT]) {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+               q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
+               q->flow_mode |= CAKE_FLOW_NAT_FLAG *
+                       !!nla_get_u32(tb[TCA_CAKE_NAT]);
+#else
+               NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
+                                   "No conntrack support in kernel");
+               return -EOPNOTSUPP;
+#endif
+       }
+
+       if (tb[TCA_CAKE_BASE_RATE64])
+               q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
+
+       if (tb[TCA_CAKE_DIFFSERV_MODE])
+               q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+
+       if (tb[TCA_CAKE_WASH]) {
+               if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
+                       q->rate_flags |= CAKE_FLAG_WASH;
+               else
+                       q->rate_flags &= ~CAKE_FLAG_WASH;
+       }
+
+       if (tb[TCA_CAKE_FLOW_MODE])
+               q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
+                               (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
+                                       CAKE_FLOW_MASK));
+
+       if (tb[TCA_CAKE_ATM])
+               q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+
+       if (tb[TCA_CAKE_OVERHEAD]) {
+               q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
+               q->rate_flags |= CAKE_FLAG_OVERHEAD;
+
+               q->max_netlen = 0;
+               q->max_adjlen = 0;
+               q->min_netlen = ~0;
+               q->min_adjlen = ~0;
+       }
+
+       if (tb[TCA_CAKE_RAW]) {
+               q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+
+               q->max_netlen = 0;
+               q->max_adjlen = 0;
+               q->min_netlen = ~0;
+               q->min_adjlen = ~0;
+       }
+
+       if (tb[TCA_CAKE_MPU])
+               q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+
+       if (tb[TCA_CAKE_RTT]) {
+               q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
+
+               if (!q->interval)
+                       q->interval = 1;
+       }
+
+       if (tb[TCA_CAKE_TARGET]) {
+               q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
+
+               if (!q->target)
+                       q->target = 1;
+       }
+
+       if (tb[TCA_CAKE_AUTORATE]) {
+               if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
+                       q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
+               else
+                       q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
+       }
+
+       if (tb[TCA_CAKE_INGRESS]) {
+               if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
+                       q->rate_flags |= CAKE_FLAG_INGRESS;
+               else
+                       q->rate_flags &= ~CAKE_FLAG_INGRESS;
+       }
+
+       if (tb[TCA_CAKE_ACK_FILTER])
+               q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]);
+
+       if (tb[TCA_CAKE_MEMORY])
+               q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
+
+       if (tb[TCA_CAKE_SPLIT_GSO]) {
+               if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO]))
+                       q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+               else
+                       q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+       }
+
+       if (q->tins) {
+               sch_tree_lock(sch);
+               cake_reconfigure(sch);
+               sch_tree_unlock(sch);
+       }
+
+       return 0;
+}
+
+static void cake_destroy(struct Qdisc *sch)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+
+       qdisc_watchdog_cancel(&q->watchdog);
+       tcf_block_put(q->block);
+       kvfree(q->tins);
+}
+
+static int cake_init(struct Qdisc *sch, struct nlattr *opt,
+                    struct netlink_ext_ack *extack)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       int i, j, err;
+
+       sch->limit = 10240;
+       q->tin_mode = CAKE_DIFFSERV_DIFFSERV3;
+       q->flow_mode  = CAKE_FLOW_TRIPLE;
+
+       q->rate_bps = 0; /* unlimited by default */
+
+       q->interval = 100000; /* 100ms default */
+       q->target   =   5000; /* 5ms: codel RFC argues
+                              * for 5 to 10% of interval
+                              */
+       q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+       q->cur_tin = 0;
+       q->cur_flow  = 0;
+
+       qdisc_watchdog_init(&q->watchdog, sch);
+
+       if (opt) {
+               int err = cake_change(sch, opt, extack);
+
+               if (err)
+                       return err;
+       }
+
+       err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
+       if (err)
+               return err;
+
+       quantum_div[0] = ~0;
+       for (i = 1; i <= CAKE_QUEUES; i++)
+               quantum_div[i] = 65535 / i;
+
+       q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data),
+                          GFP_KERNEL);
+       if (!q->tins)
+               goto nomem;
+
+       for (i = 0; i < CAKE_MAX_TINS; i++) {
+               struct cake_tin_data *b = q->tins + i;
+
+               INIT_LIST_HEAD(&b->new_flows);
+               INIT_LIST_HEAD(&b->old_flows);
+               INIT_LIST_HEAD(&b->decaying_flows);
+               b->sparse_flow_count = 0;
+               b->bulk_flow_count = 0;
+               b->decaying_flow_count = 0;
+
+               for (j = 0; j < CAKE_QUEUES; j++) {
+                       struct cake_flow *flow = b->flows + j;
+                       u32 k = j * CAKE_MAX_TINS + i;
+
+                       INIT_LIST_HEAD(&flow->flowchain);
+                       cobalt_vars_init(&flow->cvars);
+
+                       q->overflow_heap[k].t = i;
+                       q->overflow_heap[k].b = j;
+                       b->overflow_idx[j] = k;
+               }
+       }
+
+       cake_reconfigure(sch);
+       q->avg_peak_bandwidth = q->rate_bps;
+       q->min_netlen = ~0;
+       q->min_adjlen = ~0;
+       return 0;
+
+nomem:
+       cake_destroy(sch);
+       return -ENOMEM;
+}
+
+static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       struct nlattr *opts;
+
+       opts = nla_nest_start(skb, TCA_OPTIONS);
+       if (!opts)
+               goto nla_put_failure;
+
+       if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
+                             TCA_CAKE_PAD))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
+                       q->flow_mode & CAKE_FLOW_MASK))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
+                       !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_INGRESS,
+                       !!(q->rate_flags & CAKE_FLAG_INGRESS)))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_NAT,
+                       !!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_WASH,
+                       !!(q->rate_flags & CAKE_FLAG_WASH)))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+               goto nla_put_failure;
+
+       if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+               if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
+                       goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+               goto nla_put_failure;
+
+       if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO,
+                       !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
+               goto nla_put_failure;
+
+       return nla_nest_end(skb, opts);
+
+nla_put_failure:
+       return -1;
+}
+
+static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+       struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP);
+       struct cake_sched_data *q = qdisc_priv(sch);
+       struct nlattr *tstats, *ts;
+       int i;
+
+       if (!stats)
+               return -1;
+
+#define PUT_STAT_U32(attr, data) do {                                 \
+               if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+                       goto nla_put_failure;                          \
+       } while (0)
+#define PUT_STAT_U64(attr, data) do {                                 \
+               if (nla_put_u64_64bit(d->skb, TCA_CAKE_STATS_ ## attr, \
+                                       data, TCA_CAKE_STATS_PAD)) \
+                       goto nla_put_failure;                          \
+       } while (0)
+
+       PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth);
+       PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit);
+       PUT_STAT_U32(MEMORY_USED, q->buffer_max_used);
+       PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16));
+       PUT_STAT_U32(MAX_NETLEN, q->max_netlen);
+       PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
+       PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
+       PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
+
+#undef PUT_STAT_U32
+#undef PUT_STAT_U64
+
+       tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS);
+       if (!tstats)
+               goto nla_put_failure;
+
+#define PUT_TSTAT_U32(attr, data) do {                                 \
+               if (nla_put_u32(d->skb, TCA_CAKE_TIN_STATS_ ## attr, data)) \
+                       goto nla_put_failure;                           \
+       } while (0)
+#define PUT_TSTAT_U64(attr, data) do {                                 \
+               if (nla_put_u64_64bit(d->skb, TCA_CAKE_TIN_STATS_ ## attr, \
+                                       data, TCA_CAKE_TIN_STATS_PAD))  \
+                       goto nla_put_failure;                           \
+       } while (0)
+
+       for (i = 0; i < q->tin_cnt; i++) {
+               struct cake_tin_data *b = &q->tins[q->tin_order[i]];
+
+               ts = nla_nest_start(d->skb, i + 1);
+               if (!ts)
+                       goto nla_put_failure;
+
+               PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps);
+               PUT_TSTAT_U64(SENT_BYTES64, b->bytes);
+               PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog);
+
+               PUT_TSTAT_U32(TARGET_US,
+                             ktime_to_us(ns_to_ktime(b->cparams.target)));
+               PUT_TSTAT_U32(INTERVAL_US,
+                             ktime_to_us(ns_to_ktime(b->cparams.interval)));
+
+               PUT_TSTAT_U32(SENT_PACKETS, b->packets);
+               PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped);
+               PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark);
+               PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops);
+
+               PUT_TSTAT_U32(PEAK_DELAY_US,
+                             ktime_to_us(ns_to_ktime(b->peak_delay)));
+               PUT_TSTAT_U32(AVG_DELAY_US,
+                             ktime_to_us(ns_to_ktime(b->avge_delay)));
+               PUT_TSTAT_U32(BASE_DELAY_US,
+                             ktime_to_us(ns_to_ktime(b->base_delay)));
+
+               PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits);
+               PUT_TSTAT_U32(WAY_MISSES, b->way_misses);
+               PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions);
+
+               PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count +
+                                           b->decaying_flow_count);
+               PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count);
+               PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count);
+               PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen);
+
+               PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum);
+               nla_nest_end(d->skb, ts);
+       }
+
+#undef PUT_TSTAT_U32
+#undef PUT_TSTAT_U64
+
+       nla_nest_end(d->skb, tstats);
+       return nla_nest_end(d->skb, stats);
+
+nla_put_failure:
+       nla_nest_cancel(d->skb, stats);
+       return -1;
+}
+
+static struct Qdisc *cake_leaf(struct Qdisc *sch, unsigned long arg)
+{
+       return NULL;
+}
+
+static unsigned long cake_find(struct Qdisc *sch, u32 classid)
+{
+       return 0;
+}
+
+static unsigned long cake_bind(struct Qdisc *sch, unsigned long parent,
+                              u32 classid)
+{
+       return 0;
+}
+
+static void cake_unbind(struct Qdisc *q, unsigned long cl)
+{
+}
+
+static struct tcf_block *cake_tcf_block(struct Qdisc *sch, unsigned long cl,
+                                       struct netlink_ext_ack *extack)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+
+       if (cl)
+               return NULL;
+       return q->block;
+}
+
+static int cake_dump_class(struct Qdisc *sch, unsigned long cl,
+                          struct sk_buff *skb, struct tcmsg *tcm)
+{
+       tcm->tcm_handle |= TC_H_MIN(cl);
+       return 0;
+}
+
+static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+                                struct gnet_dump *d)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       const struct cake_flow *flow = NULL;
+       struct gnet_stats_queue qs = { 0 };
+       struct nlattr *stats;
+       u32 idx = cl - 1;
+
+       if (idx < CAKE_QUEUES * q->tin_cnt) {
+               const struct cake_tin_data *b = \
+                       &q->tins[q->tin_order[idx / CAKE_QUEUES]];
+               const struct sk_buff *skb;
+
+               flow = &b->flows[idx % CAKE_QUEUES];
+
+               if (flow->head) {
+                       sch_tree_lock(sch);
+                       skb = flow->head;
+                       while (skb) {
+                               qs.qlen++;
+                               skb = skb->next;
+                       }
+                       sch_tree_unlock(sch);
+               }
+               qs.backlog = b->backlogs[idx % CAKE_QUEUES];
+               qs.drops = flow->dropped;
+       }
+       if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
+               return -1;
+       if (flow) {
+               ktime_t now = ktime_get();
+
+               stats = nla_nest_start(d->skb, TCA_STATS_APP);
+               if (!stats)
+                       return -1;
+
+#define PUT_STAT_U32(attr, data) do {                                 \
+               if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+                       goto nla_put_failure;                          \
+       } while (0)
+#define PUT_STAT_S32(attr, data) do {                                 \
+               if (nla_put_s32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+                       goto nla_put_failure;                          \
+       } while (0)
+
+               PUT_STAT_S32(DEFICIT, flow->deficit);
+               PUT_STAT_U32(DROPPING, flow->cvars.dropping);
+               PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
+               PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
+               if (flow->cvars.p_drop) {
+                       PUT_STAT_S32(BLUE_TIMER_US,
+                                    ktime_to_us(
+                                            ktime_sub(now,
+                                                    flow->cvars.blue_timer)));
+               }
+               if (flow->cvars.dropping) {
+                       PUT_STAT_S32(DROP_NEXT_US,
+                                    ktime_to_us(
+                                            ktime_sub(now,
+                                                      flow->cvars.drop_next)));
+               }
+
+               if (nla_nest_end(d->skb, stats) < 0)
+                       return -1;
+       }
+
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(d->skb, stats);
+       return -1;
+}
+
+static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+       struct cake_sched_data *q = qdisc_priv(sch);
+       unsigned int i, j;
+
+       if (arg->stop)
+               return;
+
+       for (i = 0; i < q->tin_cnt; i++) {
+               struct cake_tin_data *b = &q->tins[q->tin_order[i]];
+
+               for (j = 0; j < CAKE_QUEUES; j++) {
+                       if (list_empty(&b->flows[j].flowchain) ||
+                           arg->count < arg->skip) {
+                               arg->count++;
+                               continue;
+                       }
+                       if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) {
+                               arg->stop = 1;
+                               break;
+                       }
+                       arg->count++;
+               }
+       }
+}
+
+static const struct Qdisc_class_ops cake_class_ops = {
+       .leaf           =       cake_leaf,
+       .find           =       cake_find,
+       .tcf_block      =       cake_tcf_block,
+       .bind_tcf       =       cake_bind,
+       .unbind_tcf     =       cake_unbind,
+       .dump           =       cake_dump_class,
+       .dump_stats     =       cake_dump_class_stats,
+       .walk           =       cake_walk,
+};
+
+static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
+       .cl_ops         =       &cake_class_ops,
+       .id             =       "cake",
+       .priv_size      =       sizeof(struct cake_sched_data),
+       .enqueue        =       cake_enqueue,
+       .dequeue        =       cake_dequeue,
+       .peek           =       qdisc_peek_dequeued,
+       .init           =       cake_init,
+       .reset          =       cake_reset,
+       .destroy        =       cake_destroy,
+       .change         =       cake_change,
+       .dump           =       cake_dump,
+       .dump_stats     =       cake_dump_stats,
+       .owner          =       THIS_MODULE,
+};
+
+static int __init cake_module_init(void)
+{
+       return register_qdisc(&cake_qdisc_ops);
+}
+
+static void __exit cake_module_exit(void)
+{
+       unregister_qdisc(&cake_qdisc_ops);
+}
+
+module_init(cake_module_init)
+module_exit(cake_module_exit)
+MODULE_AUTHOR("Jonathan Morton");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("The CAKE shaper.");
index cdd96b9a27bcf1ef510e282ff97a8710132b1ce6..e26a24017faa6b15d1f6100350d9c39b539ce50b 100644 (file)
@@ -78,18 +78,42 @@ struct cbs_sched_data {
        s64 sendslope; /* in bytes/s */
        s64 idleslope; /* in bytes/s */
        struct qdisc_watchdog watchdog;
-       int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch);
+       int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch,
+                      struct sk_buff **to_free);
        struct sk_buff *(*dequeue)(struct Qdisc *sch);
+       struct Qdisc *qdisc;
 };
 
-static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch)
+static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+                            struct Qdisc *child,
+                            struct sk_buff **to_free)
 {
-       return qdisc_enqueue_tail(skb, sch);
+       int err;
+
+       err = child->ops->enqueue(skb, child, to_free);
+       if (err != NET_XMIT_SUCCESS)
+               return err;
+
+       qdisc_qstats_backlog_inc(sch, skb);
+       sch->q.qlen++;
+
+       return NET_XMIT_SUCCESS;
 }
 
-static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
+static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch,
+                              struct sk_buff **to_free)
 {
        struct cbs_sched_data *q = qdisc_priv(sch);
+       struct Qdisc *qdisc = q->qdisc;
+
+       return cbs_child_enqueue(skb, sch, qdisc, to_free);
+}
+
+static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch,
+                           struct sk_buff **to_free)
+{
+       struct cbs_sched_data *q = qdisc_priv(sch);
+       struct Qdisc *qdisc = q->qdisc;
 
        if (sch->q.qlen == 0 && q->credits > 0) {
                /* We need to stop accumulating credits when there's
@@ -99,7 +123,7 @@ static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
                q->last = ktime_get_ns();
        }
 
-       return qdisc_enqueue_tail(skb, sch);
+       return cbs_child_enqueue(skb, sch, qdisc, to_free);
 }
 
 static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -107,7 +131,7 @@ static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 {
        struct cbs_sched_data *q = qdisc_priv(sch);
 
-       return q->enqueue(skb, sch);
+       return q->enqueue(skb, sch, to_free);
 }
 
 /* timediff is in ns, slope is in bytes/s */
@@ -132,9 +156,25 @@ static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
        return div64_s64(len * slope, port_rate);
 }
 
+static struct sk_buff *cbs_child_dequeue(struct Qdisc *sch, struct Qdisc *child)
+{
+       struct sk_buff *skb;
+
+       skb = child->ops->dequeue(child);
+       if (!skb)
+               return NULL;
+
+       qdisc_qstats_backlog_dec(sch, skb);
+       qdisc_bstats_update(sch, skb);
+       sch->q.qlen--;
+
+       return skb;
+}
+
 static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
 {
        struct cbs_sched_data *q = qdisc_priv(sch);
+       struct Qdisc *qdisc = q->qdisc;
        s64 now = ktime_get_ns();
        struct sk_buff *skb;
        s64 credits;
@@ -157,8 +197,7 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
                        return NULL;
                }
        }
-
-       skb = qdisc_dequeue_head(sch);
+       skb = cbs_child_dequeue(sch, qdisc);
        if (!skb)
                return NULL;
 
@@ -178,7 +217,10 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
 
 static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch)
 {
-       return qdisc_dequeue_head(sch);
+       struct cbs_sched_data *q = qdisc_priv(sch);
+       struct Qdisc *qdisc = q->qdisc;
+
+       return cbs_child_dequeue(sch, qdisc);
 }
 
 static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
@@ -310,6 +352,13 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
                return -EINVAL;
        }
 
+       q->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+                                    sch->handle, extack);
+       if (!q->qdisc)
+               return -ENOMEM;
+
+       qdisc_hash_add(q->qdisc, false);
+
        q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
 
        q->enqueue = cbs_enqueue_soft;
@@ -328,6 +377,9 @@ static void cbs_destroy(struct Qdisc *sch)
        qdisc_watchdog_cancel(&q->watchdog);
 
        cbs_disable_offload(dev, q);
+
+       if (q->qdisc)
+               qdisc_destroy(q->qdisc);
 }
 
 static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -356,8 +408,72 @@ nla_put_failure:
        return -1;
 }
 
+static int cbs_dump_class(struct Qdisc *sch, unsigned long cl,
+                         struct sk_buff *skb, struct tcmsg *tcm)
+{
+       struct cbs_sched_data *q = qdisc_priv(sch);
+
+       if (cl != 1 || !q->qdisc)       /* only one class */
+               return -ENOENT;
+
+       tcm->tcm_handle |= TC_H_MIN(1);
+       tcm->tcm_info = q->qdisc->handle;
+
+       return 0;
+}
+
+static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+                    struct Qdisc **old, struct netlink_ext_ack *extack)
+{
+       struct cbs_sched_data *q = qdisc_priv(sch);
+
+       if (!new) {
+               new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+                                       sch->handle, NULL);
+               if (!new)
+                       new = &noop_qdisc;
+       }
+
+       *old = qdisc_replace(sch, new, &q->qdisc);
+       return 0;
+}
+
+static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg)
+{
+       struct cbs_sched_data *q = qdisc_priv(sch);
+
+       return q->qdisc;
+}
+
+static unsigned long cbs_find(struct Qdisc *sch, u32 classid)
+{
+       return 1;
+}
+
+static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+       if (!walker->stop) {
+               if (walker->count >= walker->skip) {
+                       if (walker->fn(sch, 1, walker) < 0) {
+                               walker->stop = 1;
+                               return;
+                       }
+               }
+               walker->count++;
+       }
+}
+
+static const struct Qdisc_class_ops cbs_class_ops = {
+       .graft          =       cbs_graft,
+       .leaf           =       cbs_leaf,
+       .find           =       cbs_find,
+       .walk           =       cbs_walk,
+       .dump           =       cbs_dump_class,
+};
+
 static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
        .id             =       "cbs",
+       .cl_ops         =       &cbs_class_ops,
        .priv_size      =       sizeof(struct cbs_sched_data),
        .enqueue        =       cbs_enqueue,
        .dequeue        =       cbs_dequeue,
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
new file mode 100644 (file)
index 0000000..1538d6f
--- /dev/null
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* net/sched/sch_etf.c  Earliest TxTime First queueing discipline.
+ *
+ * Authors:    Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
+ *             Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/errqueue.h>
+#include <linux/rbtree.h>
+#include <linux/skbuff.h>
+#include <linux/posix-timers.h>
+#include <net/netlink.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+
+#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
+#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
+
+struct etf_sched_data {
+       bool offload;
+       bool deadline_mode;
+       int clockid;
+       int queue;
+       s32 delta; /* in ns */
+       ktime_t last; /* The txtime of the last skb sent to the netdevice. */
+       struct rb_root head;
+       struct qdisc_watchdog watchdog;
+       ktime_t (*get_time)(void);
+};
+
+static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
+       [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
+};
+
+static inline int validate_input_params(struct tc_etf_qopt *qopt,
+                                       struct netlink_ext_ack *extack)
+{
+       /* Check if params comply to the following rules:
+        *      * Clockid and delta must be valid.
+        *
+        *      * Dynamic clockids are not supported.
+        *
+        *      * Delta must be a positive integer.
+        *
+        * Also note that for the HW offload case, we must
+        * expect that system clocks have been synchronized to PHC.
+        */
+       if (qopt->clockid < 0) {
+               NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
+               return -ENOTSUPP;
+       }
+
+       if (qopt->clockid != CLOCK_TAI) {
+               NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
+               return -EINVAL;
+       }
+
+       if (qopt->delta < 0) {
+               NL_SET_ERR_MSG(extack, "Delta must be positive");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+       ktime_t txtime = nskb->tstamp;
+       struct sock *sk = nskb->sk;
+       ktime_t now;
+
+       if (!sk)
+               return false;
+
+       if (!sock_flag(sk, SOCK_TXTIME))
+               return false;
+
+       /* We don't perform crosstimestamping.
+        * Drop if packet's clockid differs from qdisc's.
+        */
+       if (sk->sk_clockid != q->clockid)
+               return false;
+
+       if (sk->sk_txtime_deadline_mode != q->deadline_mode)
+               return false;
+
+       now = q->get_time();
+       if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
+               return false;
+
+       return true;
+}
+
+static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+       struct rb_node *p;
+
+       p = rb_first(&q->head);
+       if (!p)
+               return NULL;
+
+       return rb_to_skb(p);
+}
+
+static void reset_watchdog(struct Qdisc *sch)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+       struct sk_buff *skb = etf_peek_timesortedlist(sch);
+       ktime_t next;
+
+       if (!skb)
+               return;
+
+       next = ktime_sub_ns(skb->tstamp, q->delta);
+       qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
+}
+
+static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
+{
+       struct sock_exterr_skb *serr;
+       struct sk_buff *clone;
+       ktime_t txtime = skb->tstamp;
+
+       if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
+               return;
+
+       clone = skb_clone(skb, GFP_ATOMIC);
+       if (!clone)
+               return;
+
+       serr = SKB_EXT_ERR(clone);
+       serr->ee.ee_errno = err;
+       serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
+       serr->ee.ee_type = 0;
+       serr->ee.ee_code = code;
+       serr->ee.ee_pad = 0;
+       serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
+       serr->ee.ee_info = txtime; /* low part of tstamp */
+
+       if (sock_queue_err_skb(skb->sk, clone))
+               kfree_skb(clone);
+}
+
+static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
+                                     struct sk_buff **to_free)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+       struct rb_node **p = &q->head.rb_node, *parent = NULL;
+       ktime_t txtime = nskb->tstamp;
+
+       if (!is_packet_valid(sch, nskb)) {
+               report_sock_error(nskb, EINVAL,
+                                 SO_EE_CODE_TXTIME_INVALID_PARAM);
+               return qdisc_drop(nskb, sch, to_free);
+       }
+
+       while (*p) {
+               struct sk_buff *skb;
+
+               parent = *p;
+               skb = rb_to_skb(parent);
+               if (ktime_after(txtime, skb->tstamp))
+                       p = &parent->rb_right;
+               else
+                       p = &parent->rb_left;
+       }
+       rb_link_node(&nskb->rbnode, parent, p);
+       rb_insert_color(&nskb->rbnode, &q->head);
+
+       qdisc_qstats_backlog_inc(sch, nskb);
+       sch->q.qlen++;
+
+       /* Now we may need to re-arm the qdisc watchdog for the next packet. */
+       reset_watchdog(sch);
+
+       return NET_XMIT_SUCCESS;
+}
+
+static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb,
+                                bool drop)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+
+       rb_erase(&skb->rbnode, &q->head);
+
+       /* The rbnode field in the skb re-uses these fields, now that
+        * we are done with the rbnode, reset them.
+        */
+       skb->next = NULL;
+       skb->prev = NULL;
+       skb->dev = qdisc_dev(sch);
+
+       qdisc_qstats_backlog_dec(sch, skb);
+
+       if (drop) {
+               struct sk_buff *to_free = NULL;
+
+               report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
+
+               qdisc_drop(skb, sch, &to_free);
+               kfree_skb_list(to_free);
+               qdisc_qstats_overlimit(sch);
+       } else {
+               qdisc_bstats_update(sch, skb);
+
+               q->last = skb->tstamp;
+       }
+
+       sch->q.qlen--;
+}
+
+static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+       struct sk_buff *skb;
+       ktime_t now, next;
+
+       skb = etf_peek_timesortedlist(sch);
+       if (!skb)
+               return NULL;
+
+       now = q->get_time();
+
+       /* Drop if packet has expired while in queue. */
+       if (ktime_before(skb->tstamp, now)) {
+               timesortedlist_erase(sch, skb, true);
+               skb = NULL;
+               goto out;
+       }
+
+       /* When in deadline mode, dequeue as soon as possible and change the
+        * txtime from deadline to (now + delta).
+        */
+       if (q->deadline_mode) {
+               timesortedlist_erase(sch, skb, false);
+               skb->tstamp = now;
+               goto out;
+       }
+
+       next = ktime_sub_ns(skb->tstamp, q->delta);
+
+       /* Dequeue only if now is within the [txtime - delta, txtime] range. */
+       if (ktime_after(now, next))
+               timesortedlist_erase(sch, skb, false);
+       else
+               skb = NULL;
+
+out:
+       /* Now we may need to re-arm the qdisc watchdog for the next packet. */
+       reset_watchdog(sch);
+
+       return skb;
+}
+
+static void etf_disable_offload(struct net_device *dev,
+                               struct etf_sched_data *q)
+{
+       struct tc_etf_qopt_offload etf = { };
+       const struct net_device_ops *ops;
+       int err;
+
+       if (!q->offload)
+               return;
+
+       ops = dev->netdev_ops;
+       if (!ops->ndo_setup_tc)
+               return;
+
+       etf.queue = q->queue;
+       etf.enable = 0;
+
+       err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
+       if (err < 0)
+               pr_warn("Couldn't disable ETF offload for queue %d\n",
+                       etf.queue);
+}
+
+static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
+                             struct netlink_ext_ack *extack)
+{
+       const struct net_device_ops *ops = dev->netdev_ops;
+       struct tc_etf_qopt_offload etf = { };
+       int err;
+
+       if (q->offload)
+               return 0;
+
+       if (!ops->ndo_setup_tc) {
+               NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
+               return -EOPNOTSUPP;
+       }
+
+       etf.queue = q->queue;
+       etf.enable = 1;
+
+       err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
+       if (err < 0) {
+               NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
+               return err;
+       }
+
+       return 0;
+}
+
+static int etf_init(struct Qdisc *sch, struct nlattr *opt,
+                   struct netlink_ext_ack *extack)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+       struct net_device *dev = qdisc_dev(sch);
+       struct nlattr *tb[TCA_ETF_MAX + 1];
+       struct tc_etf_qopt *qopt;
+       int err;
+
+       if (!opt) {
+               NL_SET_ERR_MSG(extack,
+                              "Missing ETF qdisc options which are mandatory");
+               return -EINVAL;
+       }
+
+       err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack);
+       if (err < 0)
+               return err;
+
+       if (!tb[TCA_ETF_PARMS]) {
+               NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
+               return -EINVAL;
+       }
+
+       qopt = nla_data(tb[TCA_ETF_PARMS]);
+
+       pr_debug("delta %d clockid %d offload %s deadline %s\n",
+                qopt->delta, qopt->clockid,
+                OFFLOAD_IS_ON(qopt) ? "on" : "off",
+                DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
+
+       err = validate_input_params(qopt, extack);
+       if (err < 0)
+               return err;
+
+       q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
+
+       if (OFFLOAD_IS_ON(qopt)) {
+               err = etf_enable_offload(dev, q, extack);
+               if (err < 0)
+                       return err;
+       }
+
+       /* Everything went OK, save the parameters used. */
+       q->delta = qopt->delta;
+       q->clockid = qopt->clockid;
+       q->offload = OFFLOAD_IS_ON(qopt);
+       q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
+
+       switch (q->clockid) {
+       case CLOCK_REALTIME:
+               q->get_time = ktime_get_real;
+               break;
+       case CLOCK_MONOTONIC:
+               q->get_time = ktime_get;
+               break;
+       case CLOCK_BOOTTIME:
+               q->get_time = ktime_get_boottime;
+               break;
+       case CLOCK_TAI:
+               q->get_time = ktime_get_clocktai;
+               break;
+       default:
+               NL_SET_ERR_MSG(extack, "Clockid is not supported");
+               return -ENOTSUPP;
+       }
+
+       qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
+
+       return 0;
+}
+
+static void timesortedlist_clear(struct Qdisc *sch)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+       struct rb_node *p = rb_first(&q->head);
+
+       while (p) {
+               struct sk_buff *skb = rb_to_skb(p);
+
+               p = rb_next(p);
+
+               rb_erase(&skb->rbnode, &q->head);
+               rtnl_kfree_skbs(skb, skb);
+               sch->q.qlen--;
+       }
+}
+
+static void etf_reset(struct Qdisc *sch)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+
+       /* Only cancel watchdog if it's been initialized. */
+       if (q->watchdog.qdisc == sch)
+               qdisc_watchdog_cancel(&q->watchdog);
+
+       /* No matter which mode we are on, it's safe to clear both lists. */
+       timesortedlist_clear(sch);
+       __qdisc_reset_queue(&sch->q);
+
+       sch->qstats.backlog = 0;
+       sch->q.qlen = 0;
+
+       q->last = 0;
+}
+
+static void etf_destroy(struct Qdisc *sch)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+       struct net_device *dev = qdisc_dev(sch);
+
+       /* Only cancel watchdog if it's been initialized. */
+       if (q->watchdog.qdisc == sch)
+               qdisc_watchdog_cancel(&q->watchdog);
+
+       etf_disable_offload(dev, q);
+}
+
+static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+       struct etf_sched_data *q = qdisc_priv(sch);
+       struct tc_etf_qopt opt = { };
+       struct nlattr *nest;
+
+       nest = nla_nest_start(skb, TCA_OPTIONS);
+       if (!nest)
+               goto nla_put_failure;
+
+       opt.delta = q->delta;
+       opt.clockid = q->clockid;
+       if (q->offload)
+               opt.flags |= TC_ETF_OFFLOAD_ON;
+
+       if (q->deadline_mode)
+               opt.flags |= TC_ETF_DEADLINE_MODE_ON;
+
+       if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
+               goto nla_put_failure;
+
+       return nla_nest_end(skb, nest);
+
+nla_put_failure:
+       nla_nest_cancel(skb, nest);
+       return -1;
+}
+
+static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
+       .id             =       "etf",
+       .priv_size      =       sizeof(struct etf_sched_data),
+       .enqueue        =       etf_enqueue_timesortedlist,
+       .dequeue        =       etf_dequeue_timesortedlist,
+       .peek           =       etf_peek_timesortedlist,
+       .init           =       etf_init,
+       .reset          =       etf_reset,
+       .destroy        =       etf_destroy,
+       .dump           =       etf_dump,
+       .owner          =       THIS_MODULE,
+};
+
+static int __init etf_module_init(void)
+{
+       return register_qdisc(&etf_qdisc_ops);
+}
+
+static void __exit etf_module_exit(void)
+{
+       unregister_qdisc(&etf_qdisc_ops);
+}
+module_init(etf_module_init)
+module_exit(etf_module_exit)
+MODULE_LICENSE("GPL");
index 2a4ab7caf5534b11e8976d242c0066c699bb7fd6..43c4bfe625a917e1447b08f1875351f8d22ec2c8 100644 (file)
@@ -126,7 +126,6 @@ struct htb_class {
 
        union {
                struct htb_class_leaf {
-                       struct list_head drop_list;
                        int             deficit[TC_HTB_MAXDEPTH];
                        struct Qdisc    *q;
                } leaf;
@@ -171,7 +170,6 @@ struct htb_sched {
        struct qdisc_watchdog   watchdog;
 
        s64                     now;    /* cached dequeue time */
-       struct list_head        drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
        /* time of nearest event per level (row) */
        s64                     near_ev_cache[TC_HTB_MAXDEPTH];
@@ -562,8 +560,6 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
        if (!cl->prio_activity) {
                cl->prio_activity = 1 << cl->prio;
                htb_activate_prios(q, cl);
-               list_add_tail(&cl->un.leaf.drop_list,
-                             q->drops + cl->prio);
        }
 }
 
@@ -579,7 +575,6 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 
        htb_deactivate_prios(q, cl);
        cl->prio_activity = 0;
-       list_del_init(&cl->un.leaf.drop_list);
 }
 
 static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
@@ -981,7 +976,6 @@ static void htb_reset(struct Qdisc *sch)
                        else {
                                if (cl->un.leaf.q)
                                        qdisc_reset(cl->un.leaf.q);
-                               INIT_LIST_HEAD(&cl->un.leaf.drop_list);
                        }
                        cl->prio_activity = 0;
                        cl->cmode = HTB_CAN_SEND;
@@ -993,8 +987,6 @@ static void htb_reset(struct Qdisc *sch)
        sch->qstats.backlog = 0;
        memset(q->hlevel, 0, sizeof(q->hlevel));
        memset(q->row_mask, 0, sizeof(q->row_mask));
-       for (i = 0; i < TC_HTB_NUMPRIO; i++)
-               INIT_LIST_HEAD(q->drops + i);
 }
 
 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
@@ -1024,7 +1016,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
        struct nlattr *tb[TCA_HTB_MAX + 1];
        struct tc_htb_glob *gopt;
        int err;
-       int i;
 
        qdisc_watchdog_init(&q->watchdog, sch);
        INIT_WORK(&q->work, htb_work_func);
@@ -1050,8 +1041,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
        err = qdisc_class_hash_init(&q->clhash);
        if (err < 0)
                return err;
-       for (i = 0; i < TC_HTB_NUMPRIO; i++)
-               INIT_LIST_HEAD(q->drops + i);
 
        qdisc_skb_head_init(&q->direct_queue);
 
@@ -1224,7 +1213,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 
        parent->level = 0;
        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
-       INIT_LIST_HEAD(&parent->un.leaf.drop_list);
        parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
        parent->tokens = parent->buffer;
        parent->ctokens = parent->cbuffer;
@@ -1418,7 +1406,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                }
 
                cl->children = 0;
-               INIT_LIST_HEAD(&cl->un.leaf.drop_list);
                RB_CLEAR_NODE(&cl->pq_node);
 
                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
index 7d6801fc5340eff65b81037519ada115cbc23e20..ad18a205241690070aff0f459c59c099ff8f3e2a 100644 (file)
                 Fabio Ludovici <fabio.ludovici at yahoo.it>
 */
 
+struct disttable {
+       u32  size;
+       s16 table[0];
+};
+
 struct netem_sched_data {
        /* internal t(ime)fifo qdisc uses t_root and sch->limit */
        struct rb_root t_root;
@@ -99,10 +104,7 @@ struct netem_sched_data {
                u32 rho;
        } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
 
-       struct disttable {
-               u32  size;
-               s16 table[0];
-       } *delay_dist;
+       struct disttable *delay_dist;
 
        enum  {
                CLG_RANDOM,
@@ -142,6 +144,7 @@ struct netem_sched_data {
                s32 bytes_left;
        } slot;
 
+       struct disttable *slot_dist;
 };
 
 /* Time stamp put into socket buffer control block
@@ -180,7 +183,7 @@ static u32 get_crandom(struct crndstate *state)
        u64 value, rho;
        unsigned long answer;
 
-       if (state->rho == 0)    /* no correlation */
+       if (!state || state->rho == 0)  /* no correlation */
                return prandom_u32();
 
        value = prandom_u32();
@@ -601,10 +604,19 @@ finish_segs:
 
 static void get_slot_next(struct netem_sched_data *q, u64 now)
 {
-       q->slot.slot_next = now + q->slot_config.min_delay +
-               (prandom_u32() *
-                       (q->slot_config.max_delay -
-                               q->slot_config.min_delay) >> 32);
+       s64 next_delay;
+
+       if (!q->slot_dist)
+               next_delay = q->slot_config.min_delay +
+                               (prandom_u32() *
+                                (q->slot_config.max_delay -
+                                 q->slot_config.min_delay) >> 32);
+       else
+               next_delay = tabledist(q->slot_config.dist_delay,
+                                      (s32)(q->slot_config.dist_jitter),
+                                      NULL, q->slot_dist);
+
+       q->slot.slot_next = now + next_delay;
        q->slot.packets_left = q->slot_config.max_packets;
        q->slot.bytes_left = q->slot_config.max_bytes;
 }
@@ -721,9 +733,9 @@ static void dist_free(struct disttable *d)
  * signed 16 bit values.
  */
 
-static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
+static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
+                         const struct nlattr *attr)
 {
-       struct netem_sched_data *q = qdisc_priv(sch);
        size_t n = nla_len(attr)/sizeof(__s16);
        const __s16 *data = nla_data(attr);
        spinlock_t *root_lock;
@@ -744,7 +756,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
        root_lock = qdisc_root_sleeping_lock(sch);
 
        spin_lock_bh(root_lock);
-       swap(q->delay_dist, d);
+       swap(*tbl, d);
        spin_unlock_bh(root_lock);
 
        dist_free(d);
@@ -762,7 +774,8 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
                q->slot_config.max_bytes = INT_MAX;
        q->slot.packets_left = q->slot_config.max_packets;
        q->slot.bytes_left = q->slot_config.max_bytes;
-       if (q->slot_config.min_delay | q->slot_config.max_delay)
+       if (q->slot_config.min_delay | q->slot_config.max_delay |
+           q->slot_config.dist_jitter)
                q->slot.slot_next = ktime_get_ns();
        else
                q->slot.slot_next = 0;
@@ -926,16 +939,17 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
        }
 
        if (tb[TCA_NETEM_DELAY_DIST]) {
-               ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
-               if (ret) {
-                       /* recover clg and loss_model, in case of
-                        * q->clg and q->loss_model were modified
-                        * in get_loss_clg()
-                        */
-                       q->clg = old_clg;
-                       q->loss_model = old_loss_model;
-                       return ret;
-               }
+               ret = get_dist_table(sch, &q->delay_dist,
+                                    tb[TCA_NETEM_DELAY_DIST]);
+               if (ret)
+                       goto get_table_failure;
+       }
+
+       if (tb[TCA_NETEM_SLOT_DIST]) {
+               ret = get_dist_table(sch, &q->slot_dist,
+                                    tb[TCA_NETEM_SLOT_DIST]);
+               if (ret)
+                       goto get_table_failure;
        }
 
        sch->limit = qopt->limit;
@@ -983,6 +997,15 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
                get_slot(q, tb[TCA_NETEM_SLOT]);
 
        return ret;
+
+get_table_failure:
+       /* recover clg and loss_model, in case of
+        * q->clg and q->loss_model were modified
+        * in get_loss_clg()
+        */
+       q->clg = old_clg;
+       q->loss_model = old_loss_model;
+       return ret;
 }
 
 static int netem_init(struct Qdisc *sch, struct nlattr *opt,
@@ -1011,6 +1034,7 @@ static void netem_destroy(struct Qdisc *sch)
        if (q->qdisc)
                qdisc_destroy(q->qdisc);
        dist_free(q->delay_dist);
+       dist_free(q->slot_dist);
 }
 
 static int dump_loss_model(const struct netem_sched_data *q,
@@ -1127,7 +1151,8 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
        if (dump_loss_model(q, skb) != 0)
                goto nla_put_failure;
 
-       if (q->slot_config.min_delay | q->slot_config.max_delay) {
+       if (q->slot_config.min_delay | q->slot_config.max_delay |
+           q->slot_config.dist_jitter) {
                slot = q->slot_config;
                if (slot.max_packets == INT_MAX)
                        slot.max_packets = 0;
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
new file mode 100644 (file)
index 0000000..52c0b6d
--- /dev/null
@@ -0,0 +1,320 @@
+/*
+ * net/sched/sch_skbprio.c  SKB Priority Queue.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Nishanth Devarajan, <ndev2021@gmail.com>
+ *             Cody Doucette, <doucette@bu.edu>
+ *             original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/inet_ecn.h>
+
+/*             SKB Priority Queue
+ *     =================================
+ *
+ * Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes
+ * packets according to their skb->priority field. Under congestion,
+ * Skbprio drops already-enqueued lower priority packets to make space
+ * available for higher priority packets; it was conceived as a solution
+ * for denial-of-service defenses that need to route packets with different
+ * priorities as a mean to overcome DoS attacks.
+ */
+
+struct skbprio_sched_data {
+       /* Queue state. */
+       struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY];
+       struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY];
+       u16 highest_prio;
+       u16 lowest_prio;
+};
+
+static u16 calc_new_high_prio(const struct skbprio_sched_data *q)
+{
+       int prio;
+
+       for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) {
+               if (!skb_queue_empty(&q->qdiscs[prio]))
+                       return prio;
+       }
+
+       /* SKB queue is empty, return 0 (default highest priority setting). */
+       return 0;
+}
+
+static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
+{
+       int prio;
+
+       for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) {
+               if (!skb_queue_empty(&q->qdiscs[prio]))
+                       return prio;
+       }
+
+       /* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1
+        * (default lowest priority setting).
+        */
+       return SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+                         struct sk_buff **to_free)
+{
+       const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;
+       struct skbprio_sched_data *q = qdisc_priv(sch);
+       struct sk_buff_head *qdisc;
+       struct sk_buff_head *lp_qdisc;
+       struct sk_buff *to_drop;
+       u16 prio, lp;
+
+       /* Obtain the priority of @skb. */
+       prio = min(skb->priority, max_priority);
+
+       qdisc = &q->qdiscs[prio];
+       if (sch->q.qlen < sch->limit) {
+               __skb_queue_tail(qdisc, skb);
+               qdisc_qstats_backlog_inc(sch, skb);
+               q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+               /* Check to update highest and lowest priorities. */
+               if (prio > q->highest_prio)
+                       q->highest_prio = prio;
+
+               if (prio < q->lowest_prio)
+                       q->lowest_prio = prio;
+
+               sch->q.qlen++;
+               return NET_XMIT_SUCCESS;
+       }
+
+       /* If this packet has the lowest priority, drop it. */
+       lp = q->lowest_prio;
+       if (prio <= lp) {
+               q->qstats[prio].drops++;
+               q->qstats[prio].overlimits++;
+               return qdisc_drop(skb, sch, to_free);
+       }
+
+       __skb_queue_tail(qdisc, skb);
+       qdisc_qstats_backlog_inc(sch, skb);
+       q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+       /* Drop the packet at the tail of the lowest priority qdisc. */
+       lp_qdisc = &q->qdiscs[lp];
+       to_drop = __skb_dequeue_tail(lp_qdisc);
+       BUG_ON(!to_drop);
+       qdisc_qstats_backlog_dec(sch, to_drop);
+       qdisc_drop(to_drop, sch, to_free);
+
+       q->qstats[lp].backlog -= qdisc_pkt_len(to_drop);
+       q->qstats[lp].drops++;
+       q->qstats[lp].overlimits++;
+
+       /* Check to update highest and lowest priorities. */
+       if (skb_queue_empty(lp_qdisc)) {
+               if (q->lowest_prio == q->highest_prio) {
+                       /* The incoming packet is the only packet in queue. */
+                       BUG_ON(sch->q.qlen != 1);
+                       q->lowest_prio = prio;
+                       q->highest_prio = prio;
+               } else {
+                       q->lowest_prio = calc_new_low_prio(q);
+               }
+       }
+
+       if (prio > q->highest_prio)
+               q->highest_prio = prio;
+
+       return NET_XMIT_CN;
+}
+
+static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
+{
+       struct skbprio_sched_data *q = qdisc_priv(sch);
+       struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio];
+       struct sk_buff *skb = __skb_dequeue(hpq);
+
+       if (unlikely(!skb))
+               return NULL;
+
+       sch->q.qlen--;
+       qdisc_qstats_backlog_dec(sch, skb);
+       qdisc_bstats_update(sch, skb);
+
+       q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb);
+
+       /* Update highest priority field. */
+       if (skb_queue_empty(hpq)) {
+               if (q->lowest_prio == q->highest_prio) {
+                       BUG_ON(sch->q.qlen);
+                       q->highest_prio = 0;
+                       q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+               } else {
+                       q->highest_prio = calc_new_high_prio(q);
+               }
+       }
+       return skb;
+}
+
+static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
+                       struct netlink_ext_ack *extack)
+{
+       struct tc_skbprio_qopt *ctl = nla_data(opt);
+
+       sch->limit = ctl->limit;
+       return 0;
+}
+
+static int skbprio_init(struct Qdisc *sch, struct nlattr *opt,
+                       struct netlink_ext_ack *extack)
+{
+       struct skbprio_sched_data *q = qdisc_priv(sch);
+       int prio;
+
+       /* Initialise all queues, one for each possible priority. */
+       for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+               __skb_queue_head_init(&q->qdiscs[prio]);
+
+       memset(&q->qstats, 0, sizeof(q->qstats));
+       q->highest_prio = 0;
+       q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+       sch->limit = 64;
+       if (!opt)
+               return 0;
+
+       return skbprio_change(sch, opt, extack);
+}
+
+static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+       struct tc_skbprio_qopt opt;
+
+       opt.limit = sch->limit;
+
+       if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+               return -1;
+
+       return skb->len;
+}
+
+static void skbprio_reset(struct Qdisc *sch)
+{
+       struct skbprio_sched_data *q = qdisc_priv(sch);
+       int prio;
+
+       sch->qstats.backlog = 0;
+       sch->q.qlen = 0;
+
+       for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+               __skb_queue_purge(&q->qdiscs[prio]);
+
+       memset(&q->qstats, 0, sizeof(q->qstats));
+       q->highest_prio = 0;
+       q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static void skbprio_destroy(struct Qdisc *sch)
+{
+       struct skbprio_sched_data *q = qdisc_priv(sch);
+       int prio;
+
+       for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+               __skb_queue_purge(&q->qdiscs[prio]);
+}
+
+static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg)
+{
+       return NULL;
+}
+
+static unsigned long skbprio_find(struct Qdisc *sch, u32 classid)
+{
+       return 0;
+}
+
+static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl,
+                            struct sk_buff *skb, struct tcmsg *tcm)
+{
+       tcm->tcm_handle |= TC_H_MIN(cl);
+       return 0;
+}
+
+static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+                                  struct gnet_dump *d)
+{
+       struct skbprio_sched_data *q = qdisc_priv(sch);
+       if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1],
+               q->qstats[cl - 1].qlen) < 0)
+               return -1;
+       return 0;
+}
+
+static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+       unsigned int i;
+
+       if (arg->stop)
+               return;
+
+       for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) {
+               if (arg->count < arg->skip) {
+                       arg->count++;
+                       continue;
+               }
+               if (arg->fn(sch, i + 1, arg) < 0) {
+                       arg->stop = 1;
+                       break;
+               }
+               arg->count++;
+       }
+}
+
+static const struct Qdisc_class_ops skbprio_class_ops = {
+       .leaf           =       skbprio_leaf,
+       .find           =       skbprio_find,
+       .dump           =       skbprio_dump_class,
+       .dump_stats     =       skbprio_dump_class_stats,
+       .walk           =       skbprio_walk,
+};
+
+static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
+       .cl_ops         =       &skbprio_class_ops,
+       .id             =       "skbprio",
+       .priv_size      =       sizeof(struct skbprio_sched_data),
+       .enqueue        =       skbprio_enqueue,
+       .dequeue        =       skbprio_dequeue,
+       .peek           =       qdisc_peek_dequeued,
+       .init           =       skbprio_init,
+       .reset          =       skbprio_reset,
+       .change         =       skbprio_change,
+       .dump           =       skbprio_dump,
+       .destroy        =       skbprio_destroy,
+       .owner          =       THIS_MODULE,
+};
+
+static int __init skbprio_module_init(void)
+{
+       return register_qdisc(&skbprio_qdisc_ops);
+}
+
+static void __exit skbprio_module_exit(void)
+{
+       unregister_qdisc(&skbprio_qdisc_ops);
+}
+
+module_init(skbprio_module_init)
+module_exit(skbprio_module_exit)
+
+MODULE_LICENSE("GPL");
index c740b189d4bae64835ada8a8684cf962b947bfe9..950ecf6e7439feeb1987fd22d57a7588ba86dd07 100644 (file)
@@ -41,8 +41,8 @@ config SCTP_DBG_OBJCNT
        bool "SCTP: Debug object counts"
        depends on PROC_FS
        help
-         If you say Y, this will enable debugging support for counting the 
-         type of objects that are currently allocated.  This is useful for 
+         If you say Y, this will enable debugging support for counting the
+         type of objects that are currently allocated.  This is useful for
          identifying memory leaks. This debug information can be viewed by
          'cat /proc/net/sctp/sctp_dbg_objcnt'
 
index 5d5a16204d50516eca7d5322a60aac6511178c38..297d9cf960b928532aa2769c47f76fdb5f64efbf 100644 (file)
@@ -115,6 +115,9 @@ static struct sctp_association *sctp_association_init(
        /* Initialize path max retrans value. */
        asoc->pathmaxrxt = sp->pathmaxrxt;
 
+       asoc->flowlabel = sp->flowlabel;
+       asoc->dscp = sp->dscp;
+
        /* Initialize default path MTU. */
        asoc->pathmtu = sp->pathmtu;
 
@@ -647,6 +650,18 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
        peer->sackdelay = asoc->sackdelay;
        peer->sackfreq = asoc->sackfreq;
 
+       if (addr->sa.sa_family == AF_INET6) {
+               __be32 info = addr->v6.sin6_flowinfo;
+
+               if (info) {
+                       peer->flowlabel = ntohl(info & IPV6_FLOWLABEL_MASK);
+                       peer->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+               } else {
+                       peer->flowlabel = asoc->flowlabel;
+               }
+       }
+       peer->dscp = asoc->dscp;
+
        /* Enable/disable heartbeat, SACK delay, and path MTU discovery
         * based on association setting.
         */
index ba8a6e6c36fae998b5590a803c90c28d8302d063..9bbc5f92c941948ee22d1a6095245c08bbd64244 100644 (file)
@@ -56,6 +56,7 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/checksum.h>
 #include <net/net_namespace.h>
+#include <linux/rhashtable.h>
 
 /* Forward declarations for internal helpers. */
 static int sctp_rcv_ootb(struct sk_buff *);
index 0cd2e764f47ff0874438301324de25e4bf33dd95..fc6c5e4bffa540069f70cf33bda2942d7143fcd4 100644 (file)
@@ -209,12 +209,17 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
        struct sock *sk = skb->sk;
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct flowi6 *fl6 = &transport->fl.u.ip6;
+       __u8 tclass = np->tclass;
        int res;
 
        pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
                 skb->len, &fl6->saddr, &fl6->daddr);
 
-       IP6_ECN_flow_xmit(sk, fl6->flowlabel);
+       if (transport->dscp & SCTP_DSCP_SET_MASK)
+               tclass = transport->dscp & SCTP_DSCP_VAL_MASK;
+
+       if (INET_ECN_is_capable(tclass))
+               IP6_ECN_flow_xmit(sk, fl6->flowlabel);
 
        if (!(transport->param_flags & SPP_PMTUD_ENABLE))
                skb->ignore_df = 1;
@@ -223,7 +228,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 
        rcu_read_lock();
        res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
-                      np->tclass);
+                      tclass);
        rcu_read_unlock();
        return res;
 }
@@ -254,6 +259,17 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
                fl6->flowi6_oif = daddr->v6.sin6_scope_id;
        else if (asoc)
                fl6->flowi6_oif = asoc->base.sk->sk_bound_dev_if;
+       if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK)
+               fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK);
+
+       if (np->sndflow && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) {
+               struct ip6_flowlabel *flowlabel;
+
+               flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
+               if (!flowlabel)
+                       goto out;
+               fl6_sock_release(flowlabel);
+       }
 
        pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr);
 
index 67f73d3a1356b93d3896b6985a65e70615902b18..e948db29ab539a588e8526d2f4fc22428a9f4685 100644 (file)
@@ -426,13 +426,16 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
        struct dst_entry *dst = NULL;
        union sctp_addr *daddr = &t->ipaddr;
        union sctp_addr dst_saddr;
+       __u8 tos = inet_sk(sk)->tos;
 
+       if (t->dscp & SCTP_DSCP_SET_MASK)
+               tos = t->dscp & SCTP_DSCP_VAL_MASK;
        memset(fl4, 0x0, sizeof(struct flowi4));
        fl4->daddr  = daddr->v4.sin_addr.s_addr;
        fl4->fl4_dport = daddr->v4.sin_port;
        fl4->flowi4_proto = IPPROTO_SCTP;
        if (asoc) {
-               fl4->flowi4_tos = RT_CONN_FLAGS(asoc->base.sk);
+               fl4->flowi4_tos = RT_CONN_FLAGS_TOS(asoc->base.sk, tos);
                fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
                fl4->fl4_sport = htons(asoc->base.bind_addr.port);
        }
@@ -495,7 +498,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
                fl4->fl4_sport = laddr->a.v4.sin_port;
                flowi4_update_output(fl4,
                                     asoc->base.sk->sk_bound_dev_if,
-                                    RT_CONN_FLAGS(asoc->base.sk),
+                                    RT_CONN_FLAGS_TOS(asoc->base.sk, tos),
                                     daddr->v4.sin_addr.s_addr,
                                     laddr->a.v4.sin_addr.s_addr);
 
@@ -971,16 +974,21 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
                               struct sctp_transport *transport)
 {
        struct inet_sock *inet = inet_sk(skb->sk);
+       __u8 dscp = inet->tos;
 
        pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb,
-                skb->len, &transport->fl.u.ip4.saddr, &transport->fl.u.ip4.daddr);
+                skb->len, &transport->fl.u.ip4.saddr,
+                &transport->fl.u.ip4.daddr);
+
+       if (transport->dscp & SCTP_DSCP_SET_MASK)
+               dscp = transport->dscp & SCTP_DSCP_VAL_MASK;
 
        inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
                         IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
 
        SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
 
-       return ip_queue_xmit(&inet->sk, skb, &transport->fl);
+       return __ip_queue_xmit(&inet->sk, skb, &transport->fl, dscp);
 }
 
 static struct sctp_af sctp_af_inet;
index 298112ca8c069e3bc473e73080c37bbd61253f50..85d39309023849725c6ecb84b463024ad1d7dcbf 100644 (file)
@@ -1827,4 +1827,3 @@ nomem:
        error = -ENOMEM;
        goto out;
 }
-
index ce620e878538be99e1f79784582d0da48ba292ea..502c0d7cb105e27306df08155527d147e4fdac5f 100644 (file)
@@ -66,6 +66,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/compat.h>
+#include <linux/rhashtable.h>
 
 #include <net/ip.h>
 #include <net/icmp.h>
@@ -1696,6 +1697,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
        struct sctp_association *asoc;
        enum sctp_scope scope;
        struct cmsghdr *cmsg;
+       __be32 flowinfo = 0;
        struct sctp_af *af;
        int err;
 
@@ -1780,6 +1782,9 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
        if (!cmsgs->addrs_msg)
                return 0;
 
+       if (daddr->sa.sa_family == AF_INET6)
+               flowinfo = daddr->v6.sin6_flowinfo;
+
        /* sendv addr list parse */
        for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
                struct sctp_transport *transport;
@@ -1812,6 +1817,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
                        }
 
                        dlen = sizeof(struct in6_addr);
+                       daddr->v6.sin6_flowinfo = flowinfo;
                        daddr->v6.sin6_family = AF_INET6;
                        daddr->v6.sin6_port = htons(asoc->peer.port);
                        memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
@@ -2392,6 +2398,8 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  *     uint32_t                spp_pathmtu;
  *     uint32_t                spp_sackdelay;
  *     uint32_t                spp_flags;
+ *     uint32_t                spp_ipv6_flowlabel;
+ *     uint8_t                 spp_dscp;
  * };
  *
  *   spp_assoc_id    - (one-to-many style socket) This is filled in the
@@ -2471,6 +2479,45 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  *                     also that this field is mutually exclusive to
  *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
  *                     results.
+ *
+ *                     SPP_IPV6_FLOWLABEL:  Setting this flag enables the
+ *                     setting of the IPV6 flow label value.  The value is
+ *                     contained in the spp_ipv6_flowlabel field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_ipv6_flowlabel field has a valid value returned.
+ *                     If a specific destination address is set (in the
+ *                     spp_address field), then the value returned is that of
+ *                     the address.  If just an association is specified (and
+ *                     no address), then the association's default flow label
+ *                     is returned.  If neither an association nor a destination
+ *                     is specified, then the socket's default flow label is
+ *                     returned.  For non-IPv6 sockets, this flag will be left
+ *                     cleared.
+ *
+ *                     SPP_DSCP:  Setting this flag enables the setting of the
+ *                     Differentiated Services Code Point (DSCP) value
+ *                     associated with either the association or a specific
+ *                     address.  The value is obtained in the spp_dscp field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_dscp field has a valid value returned.  If a
+ *                     specific destination address is set when called (in the
+ *                     spp_address field), then that specific destination
+ *                     address's DSCP value is returned.  If just an association
+ *                     is specified, then the association's default DSCP is
+ *                     returned.  If neither an association nor a destination is
+ *                     specified, then the socket's default DSCP is returned.
+ *
+ *   spp_ipv6_flowlabel
+ *                   - This field is used in conjunction with the
+ *                     SPP_IPV6_FLOWLABEL flag and contains the IPv6 flow label.
+ *                     The 20 least significant bits are used for the flow
+ *                     label.  This setting has precedence over any IPv6-layer
+ *                     setting.
+ *
+ *   spp_dscp        - This field is used in conjunction with the SPP_DSCP flag
+ *                     and contains the DSCP.  The 6 most significant bits are
+ *                     used for the DSCP.  This setting has precedence over any
+ *                     IPv4- or IPv6- layer setting.
  */
 static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
                                       struct sctp_transport   *trans,
@@ -2610,6 +2657,51 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
                }
        }
 
+       if (params->spp_flags & SPP_IPV6_FLOWLABEL) {
+               if (trans && trans->ipaddr.sa.sa_family == AF_INET6) {
+                       trans->flowlabel = params->spp_ipv6_flowlabel &
+                                          SCTP_FLOWLABEL_VAL_MASK;
+                       trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+               } else if (asoc) {
+                       list_for_each_entry(trans,
+                                           &asoc->peer.transport_addr_list,
+                                           transports) {
+                               if (trans->ipaddr.sa.sa_family != AF_INET6)
+                                       continue;
+                               trans->flowlabel = params->spp_ipv6_flowlabel &
+                                                  SCTP_FLOWLABEL_VAL_MASK;
+                               trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+                       }
+                       asoc->flowlabel = params->spp_ipv6_flowlabel &
+                                         SCTP_FLOWLABEL_VAL_MASK;
+                       asoc->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+               } else if (sctp_opt2sk(sp)->sk_family == AF_INET6) {
+                       sp->flowlabel = params->spp_ipv6_flowlabel &
+                                       SCTP_FLOWLABEL_VAL_MASK;
+                       sp->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+               }
+       }
+
+       if (params->spp_flags & SPP_DSCP) {
+               if (trans) {
+                       trans->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
+                       trans->dscp |= SCTP_DSCP_SET_MASK;
+               } else if (asoc) {
+                       list_for_each_entry(trans,
+                                           &asoc->peer.transport_addr_list,
+                                           transports) {
+                               trans->dscp = params->spp_dscp &
+                                             SCTP_DSCP_VAL_MASK;
+                               trans->dscp |= SCTP_DSCP_SET_MASK;
+                       }
+                       asoc->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
+                       asoc->dscp |= SCTP_DSCP_SET_MASK;
+               } else {
+                       sp->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
+                       sp->dscp |= SCTP_DSCP_SET_MASK;
+               }
+       }
+
        return 0;
 }
 
@@ -2624,11 +2716,18 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
        int error;
        int hb_change, pmtud_change, sackdelay_change;
 
-       if (optlen != sizeof(struct sctp_paddrparams))
+       if (optlen == sizeof(params)) {
+               if (copy_from_user(&params, optval, optlen))
+                       return -EFAULT;
+       } else if (optlen == ALIGN(offsetof(struct sctp_paddrparams,
+                                           spp_ipv6_flowlabel), 4)) {
+               if (copy_from_user(&params, optval, optlen))
+                       return -EFAULT;
+               if (params.spp_flags & (SPP_DSCP | SPP_IPV6_FLOWLABEL))
+                       return -EINVAL;
+       } else {
                return -EINVAL;
-
-       if (copy_from_user(&params, optval, optlen))
-               return -EFAULT;
+       }
 
        /* Validate flags and value parameters. */
        hb_change        = params.spp_flags & SPP_HB;
@@ -4169,6 +4268,28 @@ out:
        return retval;
 }
 
+static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval,
+                                     unsigned int optlen)
+{
+       int val;
+
+       if (!sctp_style(sk, TCP))
+               return -EOPNOTSUPP;
+
+       if (sctp_sk(sk)->ep->base.bind_addr.port)
+               return -EFAULT;
+
+       if (optlen < sizeof(int))
+               return -EINVAL;
+
+       if (get_user(val, (int __user *)optval))
+               return -EFAULT;
+
+       sctp_sk(sk)->reuse = !!val;
+
+       return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4363,6 +4484,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
                retval = sctp_setsockopt_interleaving_supported(sk, optval,
                                                                optlen);
                break;
+       case SCTP_REUSE_PORT:
+               retval = sctp_setsockopt_reuse_port(sk, optval, optlen);
+               break;
        default:
                retval = -ENOPROTOOPT;
                break;
@@ -5427,6 +5551,45 @@ out:
  *                     also that this field is mutually exclusive to
  *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
  *                     results.
+ *
+ *                     SPP_IPV6_FLOWLABEL:  Setting this flag enables the
+ *                     setting of the IPV6 flow label value.  The value is
+ *                     contained in the spp_ipv6_flowlabel field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_ipv6_flowlabel field has a valid value returned.
+ *                     If a specific destination address is set (in the
+ *                     spp_address field), then the value returned is that of
+ *                     the address.  If just an association is specified (and
+ *                     no address), then the association's default flow label
+ *                     is returned.  If neither an association nor a destination
+ *                     is specified, then the socket's default flow label is
+ *                     returned.  For non-IPv6 sockets, this flag will be left
+ *                     cleared.
+ *
+ *                     SPP_DSCP:  Setting this flag enables the setting of the
+ *                     Differentiated Services Code Point (DSCP) value
+ *                     associated with either the association or a specific
+ *                     address.  The value is obtained in the spp_dscp field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_dscp field has a valid value returned.  If a
+ *                     specific destination address is set when called (in the
+ *                     spp_address field), then that specific destination
+ *                     address's DSCP value is returned.  If just an association
+ *                     is specified, then the association's default DSCP is
+ *                     returned.  If neither an association nor a destination is
+ *                     specified, then the socket's default DSCP is returned.
+ *
+ *   spp_ipv6_flowlabel
+ *                   - This field is used in conjunction with the
+ *                     SPP_IPV6_FLOWLABEL flag and contains the IPv6 flow label.
+ *                     The 20 least significant bits are used for the flow
+ *                     label.  This setting has precedence over any IPv6-layer
+ *                     setting.
+ *
+ *   spp_dscp        - This field is used in conjunction with the SPP_DSCP flag
+ *                     and contains the DSCP.  The 6 most significant bits are
+ *                     used for the DSCP.  This setting has precedence over any
+ *                     IPv4- or IPv6- layer setting.
  */
 static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
                                            char __user *optval, int __user *optlen)
@@ -5436,9 +5599,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
        struct sctp_association *asoc = NULL;
        struct sctp_sock        *sp = sctp_sk(sk);
 
-       if (len < sizeof(struct sctp_paddrparams))
+       if (len >= sizeof(params))
+               len = sizeof(params);
+       else if (len >= ALIGN(offsetof(struct sctp_paddrparams,
+                                      spp_ipv6_flowlabel), 4))
+               len = ALIGN(offsetof(struct sctp_paddrparams,
+                                    spp_ipv6_flowlabel), 4);
+       else
                return -EINVAL;
-       len = sizeof(struct sctp_paddrparams);
+
        if (copy_from_user(&params, optval, len))
                return -EFAULT;
 
@@ -5473,6 +5642,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 
                /*draft-11 doesn't say what to return in spp_flags*/
                params.spp_flags      = trans->param_flags;
+               if (trans->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
+                       params.spp_ipv6_flowlabel = trans->flowlabel &
+                                                   SCTP_FLOWLABEL_VAL_MASK;
+                       params.spp_flags |= SPP_IPV6_FLOWLABEL;
+               }
+               if (trans->dscp & SCTP_DSCP_SET_MASK) {
+                       params.spp_dscp = trans->dscp & SCTP_DSCP_VAL_MASK;
+                       params.spp_flags |= SPP_DSCP;
+               }
        } else if (asoc) {
                /* Fetch association values. */
                params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval);
@@ -5482,6 +5660,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 
                /*draft-11 doesn't say what to return in spp_flags*/
                params.spp_flags      = asoc->param_flags;
+               if (asoc->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
+                       params.spp_ipv6_flowlabel = asoc->flowlabel &
+                                                   SCTP_FLOWLABEL_VAL_MASK;
+                       params.spp_flags |= SPP_IPV6_FLOWLABEL;
+               }
+               if (asoc->dscp & SCTP_DSCP_SET_MASK) {
+                       params.spp_dscp = asoc->dscp & SCTP_DSCP_VAL_MASK;
+                       params.spp_flags |= SPP_DSCP;
+               }
        } else {
                /* Fetch socket values. */
                params.spp_hbinterval = sp->hbinterval;
@@ -5491,6 +5678,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 
                /*draft-11 doesn't say what to return in spp_flags*/
                params.spp_flags      = sp->param_flags;
+               if (sp->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
+                       params.spp_ipv6_flowlabel = sp->flowlabel &
+                                                   SCTP_FLOWLABEL_VAL_MASK;
+                       params.spp_flags |= SPP_IPV6_FLOWLABEL;
+               }
+               if (sp->dscp & SCTP_DSCP_SET_MASK) {
+                       params.spp_dscp = sp->dscp & SCTP_DSCP_VAL_MASK;
+                       params.spp_flags |= SPP_DSCP;
+               }
        }
 
        if (copy_to_user(optval, &params, len))
@@ -7196,6 +7392,26 @@ out:
        return retval;
 }
 
+static int sctp_getsockopt_reuse_port(struct sock *sk, int len,
+                                     char __user *optval,
+                                     int __user *optlen)
+{
+       int val;
+
+       if (len < sizeof(int))
+               return -EINVAL;
+
+       len = sizeof(int);
+       val = sctp_sk(sk)->reuse;
+       if (put_user(len, optlen))
+               return -EFAULT;
+
+       if (copy_to_user(optval, &val, len))
+               return -EFAULT;
+
+       return 0;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
                           char __user *optval, int __user *optlen)
 {
@@ -7391,6 +7607,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
                retval = sctp_getsockopt_interleaving_supported(sk, len, optval,
                                                                optlen);
                break;
+       case SCTP_REUSE_PORT:
+               retval = sctp_getsockopt_reuse_port(sk, len, optval, optlen);
+               break;
        default:
                retval = -ENOPROTOOPT;
                break;
@@ -7428,6 +7647,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
 
 static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 {
+       bool reuse = (sk->sk_reuse || sctp_sk(sk)->reuse);
        struct sctp_bind_hashbucket *head; /* hash list */
        struct sctp_bind_bucket *pp;
        unsigned short snum;
@@ -7500,13 +7720,11 @@ pp_found:
                 * used by other socket (pp->owner not empty); that other
                 * socket is going to be sk2.
                 */
-               int reuse = sk->sk_reuse;
                struct sock *sk2;
 
                pr_debug("%s: found a possible match\n", __func__);
 
-               if (pp->fastreuse && sk->sk_reuse &&
-                       sk->sk_state != SCTP_SS_LISTENING)
+               if (pp->fastreuse && reuse && sk->sk_state != SCTP_SS_LISTENING)
                        goto success;
 
                /* Run through the list of sockets bound to the port
@@ -7524,7 +7742,7 @@ pp_found:
                        ep2 = sctp_sk(sk2)->ep;
 
                        if (sk == sk2 ||
-                           (reuse && sk2->sk_reuse &&
+                           (reuse && (sk2->sk_reuse || sctp_sk(sk2)->reuse) &&
                             sk2->sk_state != SCTP_SS_LISTENING))
                                continue;
 
@@ -7548,12 +7766,12 @@ pp_not_found:
         * SO_REUSEADDR on this socket -sk-).
         */
        if (hlist_empty(&pp->owner)) {
-               if (sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING)
+               if (reuse && sk->sk_state != SCTP_SS_LISTENING)
                        pp->fastreuse = 1;
                else
                        pp->fastreuse = 0;
        } else if (pp->fastreuse &&
-               (!sk->sk_reuse || sk->sk_state == SCTP_SS_LISTENING))
+                  (!reuse || sk->sk_state == SCTP_SS_LISTENING))
                pp->fastreuse = 0;
 
        /* We are set, so fill up all the data in the hash table
@@ -7684,7 +7902,7 @@ int sctp_inet_listen(struct socket *sock, int backlog)
                err = 0;
                sctp_unhash_endpoint(ep);
                sk->sk_state = SCTP_SS_CLOSED;
-               if (sk->sk_reuse)
+               if (sk->sk_reuse || sctp_sk(sk)->reuse)
                        sctp_sk(sk)->bind_hash->fastreuse = 1;
                goto out;
        }
@@ -8551,6 +8769,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
        newsk->sk_no_check_tx = sk->sk_no_check_tx;
        newsk->sk_no_check_rx = sk->sk_no_check_rx;
        newsk->sk_reuse = sk->sk_reuse;
+       sctp_sk(newsk)->reuse = sp->reuse;
 
        newsk->sk_shutdown = sk->sk_shutdown;
        newsk->sk_destruct = sctp_destruct_sock;
index 188104654b545b4a2c28495d0fca9cf9b020743b..4df96b4b8130908a0bf16a2a0d997ddc51bef3d9 100644 (file)
@@ -1,4 +1,4 @@
 obj-$(CONFIG_SMC)      += smc.o
 obj-$(CONFIG_SMC_DIAG) += smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
-smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o
+smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o
index 05e4ffe5aabde6baa711b1396484cf037fbccaee..0fc94f296e541b1c61a7545c3b1daf56c01894d4 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/in.h>
 #include <linux/sched/signal.h>
+#include <linux/if_vlan.h>
 
 #include <net/sock.h>
 #include <net/tcp.h>
@@ -35,6 +36,7 @@
 #include "smc_cdc.h"
 #include "smc_core.h"
 #include "smc_ib.h"
+#include "smc_ism.h"
 #include "smc_pnet.h"
 #include "smc_tx.h"
 #include "smc_rx.h"
@@ -342,20 +344,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 
        rc = smc_ib_modify_qp_rts(link);
        if (rc)
-               return SMC_CLC_DECL_INTERR;
+               return SMC_CLC_DECL_ERR_RDYLNK;
 
        smc_wr_remember_qp_attr(link);
 
        if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
-               return SMC_CLC_DECL_INTERR;
+               return SMC_CLC_DECL_ERR_REGRMB;
 
        /* send CONFIRM LINK response over RoCE fabric */
-       rc = smc_llc_send_confirm_link(link,
-                                      link->smcibdev->mac[link->ibport - 1],
-                                      &link->smcibdev->gid[link->ibport - 1],
-                                      SMC_LLC_RESP);
+       rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
        if (rc < 0)
-               return SMC_CLC_DECL_TCL;
+               return SMC_CLC_DECL_TIMEOUT_CL;
 
        /* receive ADD LINK request from server over RoCE fabric */
        rest = wait_for_completion_interruptible_timeout(&link->llc_add,
@@ -371,18 +370,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
        /* send add link reject message, only one link supported for now */
        rc = smc_llc_send_add_link(link,
                                   link->smcibdev->mac[link->ibport - 1],
-                                  &link->smcibdev->gid[link->ibport - 1],
-                                  SMC_LLC_RESP);
+                                  link->gid, SMC_LLC_RESP);
        if (rc < 0)
-               return SMC_CLC_DECL_TCL;
+               return SMC_CLC_DECL_TIMEOUT_AL;
 
        smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
 
        return 0;
 }
 
-static void smc_conn_save_peer_info(struct smc_sock *smc,
-                                   struct smc_clc_msg_accept_confirm *clc)
+static void smcr_conn_save_peer_info(struct smc_sock *smc,
+                                    struct smc_clc_msg_accept_confirm *clc)
 {
        int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
 
@@ -393,6 +391,28 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
        smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
 }
 
+static void smcd_conn_save_peer_info(struct smc_sock *smc,
+                                    struct smc_clc_msg_accept_confirm *clc)
+{
+       int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
+
+       smc->conn.peer_rmbe_idx = clc->dmbe_idx;
+       smc->conn.peer_token = clc->token;
+       /* msg header takes up space in the buffer */
+       smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
+       atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
+       smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
+}
+
+static void smc_conn_save_peer_info(struct smc_sock *smc,
+                                   struct smc_clc_msg_accept_confirm *clc)
+{
+       if (smc->conn.lgr->is_smcd)
+               smcd_conn_save_peer_info(smc, clc);
+       else
+               smcr_conn_save_peer_info(smc, clc);
+}
+
 static void smc_link_save_peer_info(struct smc_link *link,
                                    struct smc_clc_msg_accept_confirm *clc)
 {
@@ -404,9 +424,10 @@ static void smc_link_save_peer_info(struct smc_link *link,
 }
 
 /* fall back during connect */
-static int smc_connect_fallback(struct smc_sock *smc)
+static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 {
        smc->use_fallback = true;
+       smc->fallback_rsn = reason_code;
        smc_copy_sock_settings_to_clc(smc);
        if (smc->sk.sk_state == SMC_INIT)
                smc->sk.sk_state = SMC_ACTIVE;
@@ -423,7 +444,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
                        sock_put(&smc->sk); /* passive closing */
                return reason_code;
        }
-       if (reason_code != SMC_CLC_DECL_REPLY) {
+       if (reason_code != SMC_CLC_DECL_PEERDECL) {
                rc = smc_clc_send_decline(smc, reason_code);
                if (rc < 0) {
                        if (smc->sk.sk_state == SMC_INIT)
@@ -431,7 +452,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
                        return rc;
                }
        }
-       return smc_connect_fallback(smc);
+       return smc_connect_fallback(smc, reason_code);
 }
 
 /* abort connecting */
@@ -448,7 +469,7 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
 /* check if there is a rdma device available for this connection. */
 /* called for connect and listen */
 static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
-                         u8 *ibport)
+                         u8 *ibport, unsigned short vlan_id, u8 gid[])
 {
        int reason_code = 0;
 
@@ -456,22 +477,59 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
         * within same PNETID that also contains the ethernet device
         * used for the internal TCP socket
         */
-       smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport);
+       smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
+                                   gid);
        if (!(*ibdev))
                reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
 
        return reason_code;
 }
 
+/* check if there is an ISM device available for this connection. */
+/* called for connect and listen */
+static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
+{
+       /* Find ISM device with same PNETID as connecting interface  */
+       smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
+       if (!(*ismdev))
+               return SMC_CLC_DECL_CNFERR; /* configuration error */
+       return 0;
+}
+
+/* Check for VLAN ID and register it on ISM device just for CLC handshake */
+static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
+                                     struct smcd_dev *ismdev,
+                                     unsigned short vlan_id)
+{
+       if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
+               return SMC_CLC_DECL_CNFERR;
+       return 0;
+}
+
+/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
+ * used, the VLAN ID will be registered again during the connection setup.
+ */
+static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
+                                       struct smcd_dev *ismdev,
+                                       unsigned short vlan_id)
+{
+       if (!is_smcd)
+               return 0;
+       if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
+               return SMC_CLC_DECL_CNFERR;
+       return 0;
+}
+
 /* CLC handshake during connect */
-static int smc_connect_clc(struct smc_sock *smc,
+static int smc_connect_clc(struct smc_sock *smc, int smc_type,
                           struct smc_clc_msg_accept_confirm *aclc,
-                          struct smc_ib_device *ibdev, u8 ibport)
+                          struct smc_ib_device *ibdev, u8 ibport,
+                          u8 gid[], struct smcd_dev *ismdev)
 {
        int rc = 0;
 
        /* do inband token exchange */
-       rc = smc_clc_send_proposal(smc, ibdev, ibport);
+       rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
        if (rc)
                return rc;
        /* receive SMC Accept CLC message */
@@ -488,8 +546,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
        int reason_code = 0;
 
        mutex_lock(&smc_create_lgr_pending);
-       local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl,
-                                       aclc->hdr.flag);
+       local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
+                                       ibport, &aclc->lcl, NULL, 0);
        if (local_contact < 0) {
                if (local_contact == -ENOMEM)
                        reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -504,14 +562,14 @@ static int smc_connect_rdma(struct smc_sock *smc,
        smc_conn_save_peer_info(smc, aclc);
 
        /* create send buffer and rmb */
-       if (smc_buf_create(smc))
+       if (smc_buf_create(smc, false))
                return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
 
        if (local_contact == SMC_FIRST_CONTACT)
                smc_link_save_peer_info(link, aclc);
 
        if (smc_rmb_rtoken_handling(&smc->conn, aclc))
-               return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+               return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
                                         local_contact);
 
        smc_close_init(smc);
@@ -519,12 +577,12 @@ static int smc_connect_rdma(struct smc_sock *smc,
 
        if (local_contact == SMC_FIRST_CONTACT) {
                if (smc_ib_ready_link(link))
-                       return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+                       return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
                                                 local_contact);
        } else {
                if (!smc->conn.rmb_desc->reused &&
                    smc_reg_rmb(link, smc->conn.rmb_desc, true))
-                       return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+                       return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
                                                 local_contact);
        }
        smc_rmb_sync_sg_for_device(&smc->conn);
@@ -551,41 +609,113 @@ static int smc_connect_rdma(struct smc_sock *smc,
        return 0;
 }
 
+/* setup for ISM connection of client */
+static int smc_connect_ism(struct smc_sock *smc,
+                          struct smc_clc_msg_accept_confirm *aclc,
+                          struct smcd_dev *ismdev)
+{
+       int local_contact = SMC_FIRST_CONTACT;
+       int rc = 0;
+
+       mutex_lock(&smc_create_lgr_pending);
+       local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
+                                       NULL, ismdev, aclc->gid);
+       if (local_contact < 0)
+               return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
+
+       /* Create send and receive buffers */
+       if (smc_buf_create(smc, true))
+               return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+
+       smc_conn_save_peer_info(smc, aclc);
+       smc_close_init(smc);
+       smc_rx_init(smc);
+       smc_tx_init(smc);
+
+       rc = smc_clc_send_confirm(smc);
+       if (rc)
+               return smc_connect_abort(smc, rc, local_contact);
+       mutex_unlock(&smc_create_lgr_pending);
+
+       smc_copy_sock_settings_to_clc(smc);
+       if (smc->sk.sk_state == SMC_INIT)
+               smc->sk.sk_state = SMC_ACTIVE;
+
+       return 0;
+}
+
 /* perform steps before actually connecting */
 static int __smc_connect(struct smc_sock *smc)
 {
+       bool ism_supported = false, rdma_supported = false;
        struct smc_clc_msg_accept_confirm aclc;
        struct smc_ib_device *ibdev;
+       struct smcd_dev *ismdev;
+       u8 gid[SMC_GID_SIZE];
+       unsigned short vlan;
+       int smc_type;
        int rc = 0;
        u8 ibport;
 
        sock_hold(&smc->sk); /* sock put in passive closing */
 
        if (smc->use_fallback)
-               return smc_connect_fallback(smc);
+               return smc_connect_fallback(smc, smc->fallback_rsn);
 
        /* if peer has not signalled SMC-capability, fall back */
        if (!tcp_sk(smc->clcsock->sk)->syn_smc)
-               return smc_connect_fallback(smc);
+               return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
 
        /* IPSec connections opt out of SMC-R optimizations */
        if (using_ipsec(smc))
                return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
 
-       /* check if a RDMA device is available; if not, fall back */
-       if (smc_check_rdma(smc, &ibdev, &ibport))
+       /* check for VLAN ID */
+       if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
                return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
 
+       /* check if there is an ism device available */
+       if (!smc_check_ism(smc, &ismdev) &&
+           !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
+               /* ISM is supported for this connection */
+               ism_supported = true;
+               smc_type = SMC_TYPE_D;
+       }
+
+       /* check if there is a rdma device available */
+       if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
+               /* RDMA is supported for this connection */
+               rdma_supported = true;
+               if (ism_supported)
+                       smc_type = SMC_TYPE_B; /* both */
+               else
+                       smc_type = SMC_TYPE_R; /* only RDMA */
+       }
+
+       /* if neither ISM nor RDMA are supported, fallback */
+       if (!rdma_supported && !ism_supported)
+               return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
+
        /* perform CLC handshake */
-       rc = smc_connect_clc(smc, &aclc, ibdev, ibport);
-       if (rc)
+       rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
+       if (rc) {
+               smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
                return smc_connect_decline_fallback(smc, rc);
+       }
 
-       /* connect using rdma */
-       rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
-       if (rc)
+       /* depending on previous steps, connect using rdma or ism */
+       if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
+               rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
+       else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
+               rc = smc_connect_ism(smc, &aclc, ismdev);
+       else
+               rc = SMC_CLC_DECL_MODEUNSUPP;
+       if (rc) {
+               smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
                return smc_connect_decline_fallback(smc, rc);
+       }
 
+       smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
        return 0;
 }
 
@@ -817,15 +947,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
        link = &lgr->lnk[SMC_SINGLE_LINK];
 
        if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
-               return SMC_CLC_DECL_INTERR;
+               return SMC_CLC_DECL_ERR_REGRMB;
 
        /* send CONFIRM LINK request to client over the RoCE fabric */
-       rc = smc_llc_send_confirm_link(link,
-                                      link->smcibdev->mac[link->ibport - 1],
-                                      &link->smcibdev->gid[link->ibport - 1],
-                                      SMC_LLC_REQ);
+       rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
        if (rc < 0)
-               return SMC_CLC_DECL_TCL;
+               return SMC_CLC_DECL_TIMEOUT_CL;
 
        /* receive CONFIRM LINK response from client over the RoCE fabric */
        rest = wait_for_completion_interruptible_timeout(
@@ -845,10 +972,9 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
        /* send ADD LINK request to client over the RoCE fabric */
        rc = smc_llc_send_add_link(link,
                                   link->smcibdev->mac[link->ibport - 1],
-                                  &link->smcibdev->gid[link->ibport - 1],
-                                  SMC_LLC_REQ);
+                                  link->gid, SMC_LLC_REQ);
        if (rc < 0)
-               return SMC_CLC_DECL_TCL;
+               return SMC_CLC_DECL_TIMEOUT_AL;
 
        /* receive ADD LINK response from client over the RoCE fabric */
        rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
@@ -923,7 +1049,8 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
        }
        smc_conn_free(&new_smc->conn);
        new_smc->use_fallback = true;
-       if (reason_code && reason_code != SMC_CLC_DECL_REPLY) {
+       new_smc->fallback_rsn = reason_code;
+       if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
                if (smc_clc_send_decline(new_smc, reason_code) < 0) {
                        smc_listen_out_err(new_smc);
                        return;
@@ -953,7 +1080,8 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
                                int *local_contact)
 {
        /* allocate connection / link group */
-       *local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0);
+       *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
+                                        &pclc->lcl, NULL, 0);
        if (*local_contact < 0) {
                if (*local_contact == -ENOMEM)
                        return SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -961,12 +1089,50 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
        }
 
        /* create send buffer and rmb */
-       if (smc_buf_create(new_smc))
+       if (smc_buf_create(new_smc, false))
                return SMC_CLC_DECL_MEM;
 
        return 0;
 }
 
+/* listen worker: initialize connection and buffers for SMC-D */
+static int smc_listen_ism_init(struct smc_sock *new_smc,
+                              struct smc_clc_msg_proposal *pclc,
+                              struct smcd_dev *ismdev,
+                              int *local_contact)
+{
+       struct smc_clc_msg_smcd *pclc_smcd;
+
+       pclc_smcd = smc_get_clc_msg_smcd(pclc);
+       *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
+                                        ismdev, pclc_smcd->gid);
+       if (*local_contact < 0) {
+               if (*local_contact == -ENOMEM)
+                       return SMC_CLC_DECL_MEM;/* insufficient memory*/
+               return SMC_CLC_DECL_INTERR; /* other error */
+       }
+
+       /* Check if peer can be reached via ISM device */
+       if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
+                           new_smc->conn.lgr->vlan_id,
+                           new_smc->conn.lgr->smcd)) {
+               if (*local_contact == SMC_FIRST_CONTACT)
+                       smc_lgr_forget(new_smc->conn.lgr);
+               smc_conn_free(&new_smc->conn);
+               return SMC_CLC_DECL_CNFERR;
+       }
+
+       /* Create send and receive buffers */
+       if (smc_buf_create(new_smc, true)) {
+               if (*local_contact == SMC_FIRST_CONTACT)
+                       smc_lgr_forget(new_smc->conn.lgr);
+               smc_conn_free(&new_smc->conn);
+               return SMC_CLC_DECL_MEM;
+       }
+
+       return 0;
+}
+
 /* listen worker: register buffers */
 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
 {
@@ -975,7 +1141,7 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
        if (local_contact != SMC_FIRST_CONTACT) {
                if (!new_smc->conn.rmb_desc->reused) {
                        if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
-                               return SMC_CLC_DECL_INTERR;
+                               return SMC_CLC_DECL_ERR_REGRMB;
                }
        }
        smc_rmb_sync_sg_for_device(&new_smc->conn);
@@ -995,13 +1161,13 @@ static void smc_listen_rdma_finish(struct smc_sock *new_smc,
                smc_link_save_peer_info(link, cclc);
 
        if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
-               reason_code = SMC_CLC_DECL_INTERR;
+               reason_code = SMC_CLC_DECL_ERR_RTOK;
                goto decline;
        }
 
        if (local_contact == SMC_FIRST_CONTACT) {
                if (smc_ib_ready_link(link)) {
-                       reason_code = SMC_CLC_DECL_INTERR;
+                       reason_code = SMC_CLC_DECL_ERR_RDYLNK;
                        goto decline;
                }
                /* QP confirmation over RoCE fabric */
@@ -1025,8 +1191,11 @@ static void smc_listen_work(struct work_struct *work)
        struct smc_clc_msg_accept_confirm cclc;
        struct smc_clc_msg_proposal *pclc;
        struct smc_ib_device *ibdev;
+       bool ism_supported = false;
+       struct smcd_dev *ismdev;
        u8 buf[SMC_CLC_MAX_LEN];
        int local_contact = 0;
+       unsigned short vlan;
        int reason_code = 0;
        int rc = 0;
        u8 ibport;
@@ -1039,6 +1208,7 @@ static void smc_listen_work(struct work_struct *work)
        /* check if peer is smc capable */
        if (!tcp_sk(newclcsock->sk)->syn_smc) {
                new_smc->use_fallback = true;
+               new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
                smc_listen_out_connected(new_smc);
                return;
        }
@@ -1065,15 +1235,26 @@ static void smc_listen_work(struct work_struct *work)
        smc_rx_init(new_smc);
        smc_tx_init(new_smc);
 
+       /* check if ISM is available */
+       if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
+           !smc_check_ism(new_smc, &ismdev) &&
+           !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
+               ism_supported = true;
+       }
+
        /* check if RDMA is available */
-       if (smc_check_rdma(new_smc, &ibdev, &ibport) ||
-           smc_listen_rdma_check(new_smc, pclc) ||
-           smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
-                                &local_contact) ||
-           smc_listen_rdma_reg(new_smc, local_contact)) {
+       if (!ism_supported &&
+           ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
+            smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
+            smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
+            smc_listen_rdma_check(new_smc, pclc) ||
+            smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
+                                 &local_contact) ||
+            smc_listen_rdma_reg(new_smc, local_contact))) {
                /* SMC not supported, decline */
                mutex_unlock(&smc_create_lgr_pending);
-               smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
+               smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
+                                  local_contact);
                return;
        }
 
@@ -1095,7 +1276,8 @@ static void smc_listen_work(struct work_struct *work)
        }
 
        /* finish worker */
-       smc_listen_rdma_finish(new_smc, &cclc, local_contact);
+       if (!ism_supported)
+               smc_listen_rdma_finish(new_smc, &cclc, local_contact);
        smc_conn_save_peer_info(new_smc, &cclc);
        mutex_unlock(&smc_create_lgr_pending);
        smc_listen_out_connected(new_smc);
@@ -1119,6 +1301,7 @@ static void smc_tcp_listen_work(struct work_struct *work)
 
                new_smc->listen_smc = lsmc;
                new_smc->use_fallback = lsmc->use_fallback;
+               new_smc->fallback_rsn = lsmc->fallback_rsn;
                sock_hold(lsk); /* sock_put in smc_listen_work */
                INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
                smc_copy_sock_settings_to_smc(new_smc);
@@ -1273,6 +1456,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        if (msg->msg_flags & MSG_FASTOPEN) {
                if (sk->sk_state == SMC_INIT) {
                        smc->use_fallback = true;
+                       smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
                } else {
                        rc = -EINVAL;
                        goto out;
@@ -1351,7 +1535,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
                        mask |= EPOLLERR;
        } else {
                if (sk->sk_state != SMC_CLOSED)
-                       sock_poll_wait(file, sk_sleep(sk), wait);
+                       sock_poll_wait(file, wait);
                if (sk->sk_err)
                        mask |= EPOLLERR;
                if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
@@ -1470,6 +1654,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
                /* option not supported by SMC */
                if (sk->sk_state == SMC_INIT) {
                        smc->use_fallback = true;
+                       smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
                } else {
                        if (!smc->use_fallback)
                                rc = -EINVAL;
@@ -1573,12 +1758,8 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
                    smc->sk.sk_state == SMC_CLOSED) {
                        answ = 0;
                } else {
-                       smc_curs_write(&cons,
-                              smc_curs_read(&conn->local_tx_ctrl.cons, conn),
-                                      conn);
-                       smc_curs_write(&urg,
-                                      smc_curs_read(&conn->urg_curs, conn),
-                                      conn);
+                       smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
+                       smc_curs_copy(&urg, &conn->urg_curs, conn);
                        answ = smc_curs_diff(conn->rmb_desc->len,
                                             &cons, &urg) == 1;
                }
@@ -1711,6 +1892,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
        /* create internal TCP socket for CLC handshake and fallback */
        smc = smc_sk(sk);
        smc->use_fallback = false; /* assume rdma capability first */
+       smc->fallback_rsn = 0;
        rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
                              &smc->clcsock);
        if (rc) {
index d7ca265704821a1862f84f209550c4b19fc0db59..08786ace6010028aae9f946e4b72378c3ad9181c 100644 (file)
@@ -21,8 +21,6 @@
 #define SMCPROTO_SMC           0       /* SMC protocol, IPv4 */
 #define SMCPROTO_SMC6          1       /* SMC protocol, IPv6 */
 
-#define SMC_MAX_PORTS          2       /* Max # of ports */
-
 extern struct proto smc_proto;
 extern struct proto smc_proto6;
 
@@ -185,6 +183,11 @@ struct smc_connection {
        spinlock_t              acurs_lock;     /* protect cursors */
 #endif
        struct work_struct      close_work;     /* peer sent some closing */
+       struct tasklet_struct   rx_tsklet;      /* Receiver tasklet for SMC-D */
+       u8                      rx_off;         /* receive offset:
+                                                * 0 for SMC-R, 32 for SMC-D
+                                                */
+       u64                     peer_token;     /* SMC-D token of peer */
 };
 
 struct smc_connect_info {
@@ -205,6 +208,8 @@ struct smc_sock {                           /* smc sock container */
        struct list_head        accept_q;       /* sockets to be accepted */
        spinlock_t              accept_q_lock;  /* protects accept_q */
        bool                    use_fallback;   /* fallback to tcp */
+       int                     fallback_rsn;   /* reason for fallback */
+       u32                     peer_diagnosis; /* decline reason from peer */
        int                     sockopt_defer_accept;
                                                /* sockopt TCP_DEFER_ACCEPT
                                                 * value
index 9bde1e4ca288cbf9db802fa3fd15650df4788867..ed5dcf03fe0b6ded9d363c1e5891be76f86fd49c 100644 (file)
@@ -34,14 +34,15 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
                               enum ib_wc_status wc_status)
 {
        struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd;
+       struct smc_connection *conn = cdcpend->conn;
        struct smc_sock *smc;
        int diff;
 
-       if (!cdcpend->conn)
+       if (!conn)
                /* already dismissed */
                return;
 
-       smc = container_of(cdcpend->conn, struct smc_sock, conn);
+       smc = container_of(conn, struct smc_sock, conn);
        bh_lock_sock(&smc->sk);
        if (!wc_status) {
                diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len,
@@ -52,9 +53,7 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
                atomic_add(diff, &cdcpend->conn->sndbuf_space);
                /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
                smp_mb__after_atomic();
-               smc_curs_write(&cdcpend->conn->tx_curs_fin,
-                              smc_curs_read(&cdcpend->cursor, cdcpend->conn),
-                              cdcpend->conn);
+               smc_curs_copy(&conn->tx_curs_fin, &cdcpend->cursor, conn);
        }
        smc_tx_sndbuf_nonfull(smc);
        bh_unlock_sock(&smc->sk);
@@ -110,14 +109,13 @@ int smc_cdc_msg_send(struct smc_connection *conn,
                            &conn->local_tx_ctrl, conn);
        rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
        if (!rc)
-               smc_curs_write(&conn->rx_curs_confirmed,
-                              smc_curs_read(&conn->local_tx_ctrl.cons, conn),
-                              conn);
+               smc_curs_copy(&conn->rx_curs_confirmed,
+                             &conn->local_tx_ctrl.cons, conn);
 
        return rc;
 }
 
-int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
+static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
 {
        struct smc_cdc_tx_pend *pend;
        struct smc_wr_buf *wr_buf;
@@ -130,6 +128,21 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
        return smc_cdc_msg_send(conn, wr_buf, pend);
 }
 
+int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
+{
+       int rc;
+
+       if (conn->lgr->is_smcd) {
+               spin_lock_bh(&conn->send_lock);
+               rc = smcd_cdc_msg_send(conn);
+               spin_unlock_bh(&conn->send_lock);
+       } else {
+               rc = smcr_cdc_get_slot_and_msg_send(conn);
+       }
+
+       return rc;
+}
+
 static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
                              unsigned long data)
 {
@@ -157,6 +170,44 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
                                (unsigned long)conn);
 }
 
+/* Send a SMC-D CDC header.
+ * This increments the free space available in our send buffer.
+ * Also update the confirmed receive buffer with what was sent to the peer.
+ */
+int smcd_cdc_msg_send(struct smc_connection *conn)
+{
+       struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+       struct smcd_cdc_msg cdc;
+       int rc, diff;
+
+       memset(&cdc, 0, sizeof(cdc));
+       cdc.common.type = SMC_CDC_MSG_TYPE;
+       cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
+       cdc.prod_count = conn->local_tx_ctrl.prod.count;
+
+       cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
+       cdc.cons_count = conn->local_tx_ctrl.cons.count;
+       cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
+       cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
+       rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
+       if (rc)
+               return rc;
+       smc_curs_copy(&conn->rx_curs_confirmed, &conn->local_tx_ctrl.cons,
+                     conn);
+       /* Calculate transmitted data and increment free send buffer space */
+       diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
+                            &conn->tx_curs_sent);
+       /* increased by confirmed number of bytes */
+       smp_mb__before_atomic();
+       atomic_add(diff, &conn->sndbuf_space);
+       /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
+       smp_mb__after_atomic();
+       smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn);
+
+       smc_tx_sndbuf_nonfull(smc);
+       return rc;
+}
+
 /********************************* receive ***********************************/
 
 static inline bool smc_cdc_before(u16 seq1, u16 seq2)
@@ -171,14 +222,12 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc,
        char *base;
 
        /* new data included urgent business */
-       smc_curs_write(&conn->urg_curs,
-                      smc_curs_read(&conn->local_rx_ctrl.prod, conn),
-                      conn);
+       smc_curs_copy(&conn->urg_curs, &conn->local_rx_ctrl.prod, conn);
        conn->urg_state = SMC_URG_VALID;
        if (!sock_flag(&smc->sk, SOCK_URGINLINE))
                /* we'll skip the urgent byte, so don't account for it */
                (*diff_prod)--;
-       base = (char *)conn->rmb_desc->cpu_addr;
+       base = (char *)conn->rmb_desc->cpu_addr + conn->rx_off;
        if (conn->urg_curs.count)
                conn->urg_rx_byte = *(base + conn->urg_curs.count - 1);
        else
@@ -193,12 +242,8 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
        struct smc_connection *conn = &smc->conn;
        int diff_cons, diff_prod;
 
-       smc_curs_write(&prod_old,
-                      smc_curs_read(&conn->local_rx_ctrl.prod, conn),
-                      conn);
-       smc_curs_write(&cons_old,
-                      smc_curs_read(&conn->local_rx_ctrl.cons, conn),
-                      conn);
+       smc_curs_copy(&prod_old, &conn->local_rx_ctrl.prod, conn);
+       smc_curs_copy(&cons_old, &conn->local_rx_ctrl.cons, conn);
        smc_cdc_msg_to_host(&conn->local_rx_ctrl, cdc, conn);
 
        diff_cons = smc_curs_diff(conn->peer_rmbe_size, &cons_old,
@@ -277,6 +322,34 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
        sock_put(&smc->sk); /* no free sk in softirq-context */
 }
 
+/* Schedule a tasklet for this connection. Triggered from the ISM device IRQ
+ * handler to indicate update in the DMBE.
+ *
+ * Context:
+ * - tasklet context
+ */
+static void smcd_cdc_rx_tsklet(unsigned long data)
+{
+       struct smc_connection *conn = (struct smc_connection *)data;
+       struct smcd_cdc_msg cdc;
+       struct smc_sock *smc;
+
+       if (!conn)
+               return;
+
+       memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
+       smc = container_of(conn, struct smc_sock, conn);
+       smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
+}
+
+/* Initialize receive tasklet. Called from ISM device IRQ handler to start
+ * receiver side.
+ */
+void smcd_cdc_rx_init(struct smc_connection *conn)
+{
+       tasklet_init(&conn->rx_tsklet, smcd_cdc_rx_tsklet, (unsigned long)conn);
+}
+
 /***************************** init, exit, misc ******************************/
 
 static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
@@ -293,7 +366,7 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
                return; /* invalid message */
 
        /* lookup connection */
-       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+       lgr = smc_get_lgr(link);
        read_lock_bh(&lgr->conns_lock);
        conn = smc_lgr_find_conn(ntohl(cdc->token), lgr);
        read_unlock_bh(&lgr->conns_lock);
index f60082fee5b8750c92a6c5f9dfa69d9a2d151ce6..934df4473a7cebc4f2cb4fa654d7189201745ac2 100644 (file)
@@ -50,6 +50,20 @@ struct smc_cdc_msg {
        u8                              reserved[18];
 } __packed;                                    /* format defined in RFC7609 */
 
+/* CDC message for SMC-D */
+struct smcd_cdc_msg {
+       struct smc_wr_rx_hdr common;    /* Type = 0xFE */
+       u8 res1[7];
+       u16 prod_wrap;
+       u32 prod_count;
+       u8 res2[2];
+       u16 cons_wrap;
+       u32 cons_count;
+       struct smc_cdc_producer_flags   prod_flags;
+       struct smc_cdc_conn_state_flags conn_state_flags;
+       u8 res3[8];
+} __packed;
+
 static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
 {
        return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort ||
@@ -90,47 +104,34 @@ static inline u64 smc_curs_read(union smc_host_cursor *curs,
 #endif
 }
 
-static inline u64 smc_curs_read_net(union smc_cdc_cursor *curs,
-                                   struct smc_connection *conn)
-{
-#ifndef KERNEL_HAS_ATOMIC64
-       unsigned long flags;
-       u64 ret;
-
-       spin_lock_irqsave(&conn->acurs_lock, flags);
-       ret = curs->acurs;
-       spin_unlock_irqrestore(&conn->acurs_lock, flags);
-       return ret;
-#else
-       return atomic64_read(&curs->acurs);
-#endif
-}
-
-static inline void smc_curs_write(union smc_host_cursor *curs, u64 val,
-                                 struct smc_connection *conn)
+/* Copy cursor src into tgt */
+static inline void smc_curs_copy(union smc_host_cursor *tgt,
+                                union smc_host_cursor *src,
+                                struct smc_connection *conn)
 {
 #ifndef KERNEL_HAS_ATOMIC64
        unsigned long flags;
 
        spin_lock_irqsave(&conn->acurs_lock, flags);
-       curs->acurs = val;
+       tgt->acurs = src->acurs;
        spin_unlock_irqrestore(&conn->acurs_lock, flags);
 #else
-       atomic64_set(&curs->acurs, val);
+       atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));
 #endif
 }
 
-static inline void smc_curs_write_net(union smc_cdc_cursor *curs, u64 val,
-                                     struct smc_connection *conn)
+static inline void smc_curs_copy_net(union smc_cdc_cursor *tgt,
+                                    union smc_cdc_cursor *src,
+                                    struct smc_connection *conn)
 {
 #ifndef KERNEL_HAS_ATOMIC64
        unsigned long flags;
 
        spin_lock_irqsave(&conn->acurs_lock, flags);
-       curs->acurs = val;
+       tgt->acurs = src->acurs;
        spin_unlock_irqrestore(&conn->acurs_lock, flags);
 #else
-       atomic64_set(&curs->acurs, val);
+       atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));
 #endif
 }
 
@@ -165,7 +166,7 @@ static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer,
 {
        union smc_host_cursor temp;
 
-       smc_curs_write(&temp, smc_curs_read(local, conn), conn);
+       smc_curs_copy(&temp, local, conn);
        peer->count = htonl(temp.count);
        peer->wrap = htons(temp.wrap);
        /* peer->reserved = htons(0); must be ensured by caller */
@@ -192,8 +193,8 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local,
        union smc_host_cursor temp, old;
        union smc_cdc_cursor net;
 
-       smc_curs_write(&old, smc_curs_read(local, conn), conn);
-       smc_curs_write_net(&net, smc_curs_read_net(peer, conn), conn);
+       smc_curs_copy(&old, local, conn);
+       smc_curs_copy_net(&net, peer, conn);
        temp.count = ntohl(net.count);
        temp.wrap = ntohs(net.wrap);
        if ((old.wrap > temp.wrap) && temp.wrap)
@@ -201,12 +202,12 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local,
        if ((old.wrap == temp.wrap) &&
            (old.count > temp.count))
                return;
-       smc_curs_write(local, smc_curs_read(&temp, conn), conn);
+       smc_curs_copy(local, &temp, conn);
 }
 
-static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
-                                      struct smc_cdc_msg *peer,
-                                      struct smc_connection *conn)
+static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
+                                       struct smc_cdc_msg *peer,
+                                       struct smc_connection *conn)
 {
        local->common.type = peer->common.type;
        local->len = peer->len;
@@ -218,6 +219,27 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
        local->conn_state_flags = peer->conn_state_flags;
 }
 
+static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
+                                       struct smcd_cdc_msg *peer)
+{
+       local->prod.wrap = peer->prod_wrap;
+       local->prod.count = peer->prod_count;
+       local->cons.wrap = peer->cons_wrap;
+       local->cons.count = peer->cons_count;
+       local->prod_flags = peer->prod_flags;
+       local->conn_state_flags = peer->conn_state_flags;
+}
+
+static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
+                                      struct smc_cdc_msg *peer,
+                                      struct smc_connection *conn)
+{
+       if (conn->lgr->is_smcd)
+               smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer);
+       else
+               smcr_cdc_msg_to_host(local, peer, conn);
+}
+
 struct smc_cdc_tx_pend;
 
 int smc_cdc_get_free_slot(struct smc_connection *conn,
@@ -227,6 +249,8 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
 int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
                     struct smc_cdc_tx_pend *pend);
 int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
+int smcd_cdc_msg_send(struct smc_connection *conn);
 int smc_cdc_init(void) __init;
+void smcd_cdc_rx_init(struct smc_connection *conn);
 
 #endif /* SMC_CDC_H */
index ae5d168653cecf804b20e49f27bb39bcf0385081..83aba9ade060a10d3df5452b7c07648eb7759df8 100644 (file)
 #include "smc_core.h"
 #include "smc_clc.h"
 #include "smc_ib.h"
+#include "smc_ism.h"
+
+#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
+#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
 
 /* eye catcher "SMCR" EBCDIC for CLC messages */
 static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+/* eye catcher "SMCD" EBCDIC for CLC messages */
+static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
 
 /* check if received message has a correct header length and contains valid
  * heading and trailing eyecatchers
@@ -38,10 +44,14 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
        struct smc_clc_msg_decline *dclc;
        struct smc_clc_msg_trail *trl;
 
-       if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+       if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
+           memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
                return false;
        switch (clcm->type) {
        case SMC_CLC_PROPOSAL:
+               if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
+                   clcm->path != SMC_TYPE_B)
+                       return false;
                pclc = (struct smc_clc_msg_proposal *)clcm;
                pclc_prfx = smc_clc_proposal_get_prefix(pclc);
                if (ntohs(pclc->hdr.length) !=
@@ -56,10 +66,16 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
                break;
        case SMC_CLC_ACCEPT:
        case SMC_CLC_CONFIRM:
+               if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D)
+                       return false;
                clc = (struct smc_clc_msg_accept_confirm *)clcm;
-               if (ntohs(clc->hdr.length) != sizeof(*clc))
+               if ((clcm->path == SMC_TYPE_R &&
+                    ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
+                   (clcm->path == SMC_TYPE_D &&
+                    ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
                        return false;
-               trl = &clc->trl;
+               trl = (struct smc_clc_msg_trail *)
+                       ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));
                break;
        case SMC_CLC_DECLINE:
                dclc = (struct smc_clc_msg_decline *)clcm;
@@ -70,7 +86,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
        default:
                return false;
        }
-       if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+       if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
+           memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
                return false;
        return true;
 }
@@ -296,6 +313,9 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
        datlen = ntohs(clcm->length);
        if ((len < sizeof(struct smc_clc_msg_hdr)) ||
            (datlen > buflen) ||
+           (clcm->version != SMC_CLC_V1) ||
+           (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
+            clcm->path != SMC_TYPE_B) ||
            ((clcm->type != SMC_CLC_DECLINE) &&
             (clcm->type != expected_type))) {
                smc->sk.sk_err = EPROTO;
@@ -314,7 +334,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
                goto out;
        }
        if (clcm->type == SMC_CLC_DECLINE) {
-               reason_code = SMC_CLC_DECL_REPLY;
+               struct smc_clc_msg_decline *dclc;
+
+               dclc = (struct smc_clc_msg_decline *)clcm;
+               reason_code = SMC_CLC_DECL_PEERDECL;
+               smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
                if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
                        smc->conn.lgr->sync_err = 1;
                        smc_lgr_terminate(smc->conn.lgr);
@@ -357,17 +381,18 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
 }
 
 /* send CLC PROPOSAL message across internal TCP socket */
-int smc_clc_send_proposal(struct smc_sock *smc,
-                         struct smc_ib_device *smcibdev,
-                         u8 ibport)
+int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
+                         struct smc_ib_device *ibdev, u8 ibport, u8 gid[],
+                         struct smcd_dev *ismdev)
 {
        struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
        struct smc_clc_msg_proposal_prefix pclc_prfx;
+       struct smc_clc_msg_smcd pclc_smcd;
        struct smc_clc_msg_proposal pclc;
        struct smc_clc_msg_trail trl;
        int len, i, plen, rc;
        int reason_code = 0;
-       struct kvec vec[4];
+       struct kvec vec[5];
        struct msghdr msg;
 
        /* retrieve ip prefixes for CLC proposal msg */
@@ -382,18 +407,34 @@ int smc_clc_send_proposal(struct smc_sock *smc,
        memset(&pclc, 0, sizeof(pclc));
        memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
        pclc.hdr.type = SMC_CLC_PROPOSAL;
-       pclc.hdr.length = htons(plen);
        pclc.hdr.version = SMC_CLC_V1;          /* SMC version */
-       memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
-       memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
-       memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
-       pclc.iparea_offset = htons(0);
+       pclc.hdr.path = smc_type;
+       if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) {
+               /* add SMC-R specifics */
+               memcpy(pclc.lcl.id_for_peer, local_systemid,
+                      sizeof(local_systemid));
+               memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE);
+               memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN);
+               pclc.iparea_offset = htons(0);
+       }
+       if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
+               /* add SMC-D specifics */
+               memset(&pclc_smcd, 0, sizeof(pclc_smcd));
+               plen += sizeof(pclc_smcd);
+               pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET);
+               pclc_smcd.gid = ismdev->local_gid;
+       }
+       pclc.hdr.length = htons(plen);
 
        memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
        memset(&msg, 0, sizeof(msg));
        i = 0;
        vec[i].iov_base = &pclc;
        vec[i++].iov_len = sizeof(pclc);
+       if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
+               vec[i].iov_base = &pclc_smcd;
+               vec[i++].iov_len = sizeof(pclc_smcd);
+       }
        vec[i].iov_base = &pclc_prfx;
        vec[i++].iov_len = sizeof(pclc_prfx);
        if (pclc_prfx.ipv6_prefixes_cnt > 0) {
@@ -429,35 +470,55 @@ int smc_clc_send_confirm(struct smc_sock *smc)
        struct kvec vec;
        int len;
 
-       link = &conn->lgr->lnk[SMC_SINGLE_LINK];
        /* send SMC Confirm CLC msg */
        memset(&cclc, 0, sizeof(cclc));
-       memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
        cclc.hdr.type = SMC_CLC_CONFIRM;
-       cclc.hdr.length = htons(sizeof(cclc));
        cclc.hdr.version = SMC_CLC_V1;          /* SMC version */
-       memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
-       memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
-              SMC_GID_SIZE);
-       memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
-       hton24(cclc.qpn, link->roce_qp->qp_num);
-       cclc.rmb_rkey =
-               htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
-       cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
-       cclc.rmbe_alert_token = htonl(conn->alert_token_local);
-       cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
-       cclc.rmbe_size = conn->rmbe_size_short;
-       cclc.rmb_dma_addr = cpu_to_be64(
-               (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
-       hton24(cclc.psn, link->psn_initial);
-
-       memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+       if (smc->conn.lgr->is_smcd) {
+               /* SMC-D specific settings */
+               memcpy(cclc.hdr.eyecatcher, SMCD_EYECATCHER,
+                      sizeof(SMCD_EYECATCHER));
+               cclc.hdr.path = SMC_TYPE_D;
+               cclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
+               cclc.gid = conn->lgr->smcd->local_gid;
+               cclc.token = conn->rmb_desc->token;
+               cclc.dmbe_size = conn->rmbe_size_short;
+               cclc.dmbe_idx = 0;
+               memcpy(&cclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
+               memcpy(cclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
+                      sizeof(SMCD_EYECATCHER));
+       } else {
+               /* SMC-R specific settings */
+               link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+               memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
+                      sizeof(SMC_EYECATCHER));
+               cclc.hdr.path = SMC_TYPE_R;
+               cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
+               memcpy(cclc.lcl.id_for_peer, local_systemid,
+                      sizeof(local_systemid));
+               memcpy(&cclc.lcl.gid, link->gid, SMC_GID_SIZE);
+               memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
+                      ETH_ALEN);
+               hton24(cclc.qpn, link->roce_qp->qp_num);
+               cclc.rmb_rkey =
+                       htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+               cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
+               cclc.rmbe_alert_token = htonl(conn->alert_token_local);
+               cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
+               cclc.rmbe_size = conn->rmbe_size_short;
+               cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
+                               (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+               hton24(cclc.psn, link->psn_initial);
+               memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
+                      sizeof(SMC_EYECATCHER));
+       }
 
        memset(&msg, 0, sizeof(msg));
        vec.iov_base = &cclc;
-       vec.iov_len = sizeof(cclc);
-       len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
-       if (len < sizeof(cclc)) {
+       vec.iov_len = ntohs(cclc.hdr.length);
+       len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
+                            ntohs(cclc.hdr.length));
+       if (len < ntohs(cclc.hdr.length)) {
                if (len >= 0) {
                        reason_code = -ENETUNREACH;
                        smc->sk.sk_err = -reason_code;
@@ -480,35 +541,57 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
        int rc = 0;
        int len;
 
-       link = &conn->lgr->lnk[SMC_SINGLE_LINK];
        memset(&aclc, 0, sizeof(aclc));
-       memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
        aclc.hdr.type = SMC_CLC_ACCEPT;
-       aclc.hdr.length = htons(sizeof(aclc));
        aclc.hdr.version = SMC_CLC_V1;          /* SMC version */
        if (srv_first_contact)
                aclc.hdr.flag = 1;
-       memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
-       memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
-              SMC_GID_SIZE);
-       memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
-       hton24(aclc.qpn, link->roce_qp->qp_num);
-       aclc.rmb_rkey =
-               htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
-       aclc.rmbe_idx = 1;                      /* as long as 1 RMB = 1 RMBE */
-       aclc.rmbe_alert_token = htonl(conn->alert_token_local);
-       aclc.qp_mtu = link->path_mtu;
-       aclc.rmbe_size = conn->rmbe_size_short,
-       aclc.rmb_dma_addr = cpu_to_be64(
-               (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
-       hton24(aclc.psn, link->psn_initial);
-       memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+
+       if (new_smc->conn.lgr->is_smcd) {
+               /* SMC-D specific settings */
+               aclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
+               memcpy(aclc.hdr.eyecatcher, SMCD_EYECATCHER,
+                      sizeof(SMCD_EYECATCHER));
+               aclc.hdr.path = SMC_TYPE_D;
+               aclc.gid = conn->lgr->smcd->local_gid;
+               aclc.token = conn->rmb_desc->token;
+               aclc.dmbe_size = conn->rmbe_size_short;
+               aclc.dmbe_idx = 0;
+               memcpy(&aclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
+               memcpy(aclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
+                      sizeof(SMCD_EYECATCHER));
+       } else {
+               /* SMC-R specific settings */
+               aclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
+               memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
+                      sizeof(SMC_EYECATCHER));
+               aclc.hdr.path = SMC_TYPE_R;
+               link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+               memcpy(aclc.lcl.id_for_peer, local_systemid,
+                      sizeof(local_systemid));
+               memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
+               memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1],
+                      ETH_ALEN);
+               hton24(aclc.qpn, link->roce_qp->qp_num);
+               aclc.rmb_rkey =
+                       htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+               aclc.rmbe_idx = 1;              /* as long as 1 RMB = 1 RMBE */
+               aclc.rmbe_alert_token = htonl(conn->alert_token_local);
+               aclc.qp_mtu = link->path_mtu;
+               aclc.rmbe_size = conn->rmbe_size_short,
+               aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
+                               (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+               hton24(aclc.psn, link->psn_initial);
+               memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
+                      sizeof(SMC_EYECATCHER));
+       }
 
        memset(&msg, 0, sizeof(msg));
        vec.iov_base = &aclc;
-       vec.iov_len = sizeof(aclc);
-       len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
-       if (len < sizeof(aclc)) {
+       vec.iov_len = ntohs(aclc.hdr.length);
+       len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1,
+                            ntohs(aclc.hdr.length));
+       if (len < ntohs(aclc.hdr.length)) {
                if (len >= 0)
                        new_smc->sk.sk_err = EPROTO;
                else
index 41ff9ea96139ced3b2a6760af2f310d40e6f58a7..18da89b681c2d6d17dd38b94322674ac8ae25f07 100644 (file)
 #define SMC_CLC_DECLINE                0x04
 
 #define SMC_CLC_V1             0x1             /* SMC version                */
+#define SMC_TYPE_R             0               /* SMC-R only                 */
+#define SMC_TYPE_D             1               /* SMC-D only                 */
+#define SMC_TYPE_B             3               /* SMC-R and SMC-D            */
 #define CLC_WAIT_TIME          (6 * HZ)        /* max. wait time on clcsock  */
 #define SMC_CLC_DECL_MEM       0x01010000  /* insufficient memory resources  */
-#define SMC_CLC_DECL_TIMEOUT   0x02000000  /* timeout                        */
+#define SMC_CLC_DECL_TIMEOUT_CL        0x02010000  /* timeout w4 QP confirm link     */
+#define SMC_CLC_DECL_TIMEOUT_AL        0x02020000  /* timeout w4 QP add link         */
 #define SMC_CLC_DECL_CNFERR    0x03000000  /* configuration error            */
-#define SMC_CLC_DECL_IPSEC     0x03030000  /* IPsec usage                    */
+#define SMC_CLC_DECL_PEERNOSMC 0x03010000  /* peer did not indicate SMC      */
+#define SMC_CLC_DECL_IPSEC     0x03020000  /* IPsec usage                    */
+#define SMC_CLC_DECL_NOSMCDEV  0x03030000  /* no SMC device found            */
+#define SMC_CLC_DECL_MODEUNSUPP        0x03040000  /* smc modes do not match (R or D)*/
+#define SMC_CLC_DECL_RMBE_EC   0x03050000  /* peer has eyecatcher in RMBE    */
+#define SMC_CLC_DECL_OPTUNSUPP 0x03060000  /* fastopen sockopt not supported */
 #define SMC_CLC_DECL_SYNCERR   0x04000000  /* synchronization error          */
-#define SMC_CLC_DECL_REPLY     0x06000000  /* reply to a received decline    */
+#define SMC_CLC_DECL_PEERDECL  0x05000000  /* peer declined during handshake */
 #define SMC_CLC_DECL_INTERR    0x99990000  /* internal error                 */
-#define SMC_CLC_DECL_TCL       0x02040000  /* timeout w4 QP confirm          */
-#define SMC_CLC_DECL_SEND      0x07000000  /* sending problem                */
-#define SMC_CLC_DECL_RMBE_EC   0x08000000  /* peer has eyecatcher in RMBE    */
+#define SMC_CLC_DECL_ERR_RTOK  0x99990001  /*   rtoken handling failed       */
+#define SMC_CLC_DECL_ERR_RDYLNK        0x99990002  /*   ib ready link failed         */
+#define SMC_CLC_DECL_ERR_REGRMB        0x99990003  /*   reg rmb failed               */
 
 struct smc_clc_msg_hdr {       /* header1 of clc messages */
        u8 eyecatcher[4];       /* eye catcher */
@@ -42,9 +51,11 @@ struct smc_clc_msg_hdr {     /* header1 of clc messages */
 #if defined(__BIG_ENDIAN_BITFIELD)
        u8 version : 4,
           flag    : 1,
-          rsvd    : 3;
+          rsvd    : 1,
+          path    : 2;
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
-       u8 rsvd    : 3,
+       u8 path    : 2,
+          rsvd    : 1,
           flag    : 1,
           version : 4;
 #endif
@@ -77,6 +88,11 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
        u8 ipv6_prefixes_cnt;   /* number of IPv6 prefixes in prefix array */
 } __aligned(4);
 
+struct smc_clc_msg_smcd {      /* SMC-D GID information */
+       u64 gid;                /* ISM GID of requestor */
+       u8 res[32];
+};
+
 struct smc_clc_msg_proposal {  /* clc proposal message sent by Linux */
        struct smc_clc_msg_hdr hdr;
        struct smc_clc_msg_local lcl;
@@ -94,23 +110,45 @@ struct smc_clc_msg_proposal {      /* clc proposal message sent by Linux */
 
 struct smc_clc_msg_accept_confirm {    /* clc accept / confirm message */
        struct smc_clc_msg_hdr hdr;
-       struct smc_clc_msg_local lcl;
-       u8 qpn[3];              /* QP number */
-       __be32 rmb_rkey;        /* RMB rkey */
-       u8 rmbe_idx;            /* Index of RMBE in RMB */
-       __be32 rmbe_alert_token;/* unique connection id */
+       union {
+               struct { /* SMC-R */
+                       struct smc_clc_msg_local lcl;
+                       u8 qpn[3];              /* QP number */
+                       __be32 rmb_rkey;        /* RMB rkey */
+                       u8 rmbe_idx;            /* Index of RMBE in RMB */
+                       __be32 rmbe_alert_token;/* unique connection id */
 #if defined(__BIG_ENDIAN_BITFIELD)
-       u8 rmbe_size : 4,       /* RMBE buf size (compressed notation) */
-          qp_mtu   : 4;        /* QP mtu */
+                       u8 rmbe_size : 4,       /* buf size (compressed) */
+                          qp_mtu   : 4;        /* QP mtu */
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
-       u8 qp_mtu   : 4,
-          rmbe_size : 4;
+                       u8 qp_mtu   : 4,
+                          rmbe_size : 4;
 #endif
-       u8 reserved;
-       __be64 rmb_dma_addr;    /* RMB virtual address */
-       u8 reserved2;
-       u8 psn[3];              /* initial packet sequence number */
-       struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
+                       u8 reserved;
+                       __be64 rmb_dma_addr;    /* RMB virtual address */
+                       u8 reserved2;
+                       u8 psn[3];              /* packet sequence number */
+                       struct smc_clc_msg_trail smcr_trl;
+                                               /* eye catcher "SMCR" EBCDIC */
+               } __packed;
+               struct { /* SMC-D */
+                       u64 gid;                /* Sender GID */
+                       u64 token;              /* DMB token */
+                       u8 dmbe_idx;            /* DMBE index */
+#if defined(__BIG_ENDIAN_BITFIELD)
+                       u8 dmbe_size : 4,       /* buf size (compressed) */
+                          reserved3 : 4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+                       u8 reserved3 : 4,
+                          dmbe_size : 4;
+#endif
+                       u16 reserved4;
+                       u32 linkid;             /* Link identifier */
+                       u32 reserved5[3];
+                       struct smc_clc_msg_trail smcd_trl;
+                                               /* eye catcher "SMCD" EBCDIC */
+               } __packed;
+       };
 } __packed;                    /* format defined in RFC7609 */
 
 struct smc_clc_msg_decline {   /* clc decline message */
@@ -129,13 +167,26 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
               ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
 }
 
+/* get SMC-D info from proposal message */
+static inline struct smc_clc_msg_smcd *
+smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
+{
+       if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd))
+               return NULL;
+
+       return (struct smc_clc_msg_smcd *)(prop + 1);
+}
+
+struct smcd_dev;
+
 int smc_clc_prfx_match(struct socket *clcsock,
                       struct smc_clc_msg_proposal_prefix *prop);
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
                     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
-int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
-                         u8 ibport);
+int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
+                         struct smc_ib_device *smcibdev, u8 ibport, u8 gid[],
+                         struct smcd_dev *ismdev);
 int smc_clc_send_confirm(struct smc_sock *smc);
 int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
 
index add82b0266f303ac943ecf8786e1b61e401a9b36..a46418f45ecdea396ba2f2e8ac76e2614c383c3c 100644 (file)
 #include "smc_llc.h"
 #include "smc_cdc.h"
 #include "smc_close.h"
+#include "smc_ism.h"
 
 #define SMC_LGR_NUM_INCR               256
 #define SMC_LGR_FREE_DELAY_SERV                (600 * HZ)
 #define SMC_LGR_FREE_DELAY_CLNT                (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
+#define SMC_LGR_FREE_DELAY_FAST                (8 * HZ)
 
 static struct smc_lgr_list smc_lgr_list = {    /* established link groups */
        .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
@@ -46,8 +48,13 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
         * otherwise there is a risk of out-of-sync link groups.
         */
        mod_delayed_work(system_wq, &lgr->free_work,
-                        lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
-                                                SMC_LGR_FREE_DELAY_SERV);
+                        (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
+                        SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
+}
+
+void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
+{
+       mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
 }
 
 /* Register connection's alert token in our lookup structure.
@@ -132,6 +139,20 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
        smc_lgr_schedule_free_work(lgr);
 }
 
+/* Send delete link, either as client to request the initiation
+ * of the DELETE LINK sequence from server; or as server to
+ * initiate the delete processing. See smc_llc_rx_delete_link().
+ */
+static int smc_link_send_delete(struct smc_link *lnk)
+{
+       if (lnk->state == SMC_LNK_ACTIVE &&
+           !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) {
+               smc_llc_link_deleting(lnk);
+               return 0;
+       }
+       return -ENOTCONN;
+}
+
 static void smc_lgr_free_work(struct work_struct *work)
 {
        struct smc_link_group *lgr = container_of(to_delayed_work(work),
@@ -152,17 +173,30 @@ static void smc_lgr_free_work(struct work_struct *work)
        list_del_init(&lgr->list); /* remove from smc_lgr_list */
 free:
        spin_unlock_bh(&smc_lgr_list.lock);
+
+       if (!lgr->is_smcd && !lgr->terminating) {
+               /* try to send del link msg, on error free lgr immediately */
+               if (!smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK])) {
+                       /* reschedule in case we never receive a response */
+                       smc_lgr_schedule_free_work(lgr);
+                       return;
+               }
+       }
+
        if (!delayed_work_pending(&lgr->free_work)) {
-               if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE)
-                       smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+               struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+
+               if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
+                       smc_llc_link_inactive(lnk);
                smc_lgr_free(lgr);
        }
 }
 
 /* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc,
+static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
                          struct smc_ib_device *smcibdev, u8 ibport,
-                         char *peer_systemid, unsigned short vlan_id)
+                         char *peer_systemid, unsigned short vlan_id,
+                         struct smcd_dev *smcismdev, u64 peer_gid)
 {
        struct smc_link_group *lgr;
        struct smc_link *lnk;
@@ -170,17 +204,23 @@ static int smc_lgr_create(struct smc_sock *smc,
        int rc = 0;
        int i;
 
+       if (is_smcd && vlan_id) {
+               rc = smc_ism_get_vlan(smcismdev, vlan_id);
+               if (rc)
+                       goto out;
+       }
+
        lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
        if (!lgr) {
                rc = -ENOMEM;
                goto out;
        }
-       lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
+       lgr->is_smcd = is_smcd;
        lgr->sync_err = 0;
-       memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
        lgr->vlan_id = vlan_id;
        rwlock_init(&lgr->sndbufs_lock);
        rwlock_init(&lgr->rmbs_lock);
+       rwlock_init(&lgr->conns_lock);
        for (i = 0; i < SMC_RMBE_SIZES; i++) {
                INIT_LIST_HEAD(&lgr->sndbufs[i]);
                INIT_LIST_HEAD(&lgr->rmbs[i]);
@@ -189,36 +229,48 @@ static int smc_lgr_create(struct smc_sock *smc,
        memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
        INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
        lgr->conns_all = RB_ROOT;
-
-       lnk = &lgr->lnk[SMC_SINGLE_LINK];
-       /* initialize link */
-       lnk->state = SMC_LNK_ACTIVATING;
-       lnk->link_id = SMC_SINGLE_LINK;
-       lnk->smcibdev = smcibdev;
-       lnk->ibport = ibport;
-       lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
-       if (!smcibdev->initialized)
-               smc_ib_setup_per_ibdev(smcibdev);
-       get_random_bytes(rndvec, sizeof(rndvec));
-       lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16);
-       rc = smc_llc_link_init(lnk);
-       if (rc)
-               goto free_lgr;
-       rc = smc_wr_alloc_link_mem(lnk);
-       if (rc)
-               goto clear_llc_lnk;
-       rc = smc_ib_create_protection_domain(lnk);
-       if (rc)
-               goto free_link_mem;
-       rc = smc_ib_create_queue_pair(lnk);
-       if (rc)
-               goto dealloc_pd;
-       rc = smc_wr_create_link(lnk);
-       if (rc)
-               goto destroy_qp;
-
+       if (is_smcd) {
+               /* SMC-D specific settings */
+               lgr->peer_gid = peer_gid;
+               lgr->smcd = smcismdev;
+       } else {
+               /* SMC-R specific settings */
+               lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
+               memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
+
+               lnk = &lgr->lnk[SMC_SINGLE_LINK];
+               /* initialize link */
+               lnk->state = SMC_LNK_ACTIVATING;
+               lnk->link_id = SMC_SINGLE_LINK;
+               lnk->smcibdev = smcibdev;
+               lnk->ibport = ibport;
+               lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
+               if (!smcibdev->initialized)
+                       smc_ib_setup_per_ibdev(smcibdev);
+               get_random_bytes(rndvec, sizeof(rndvec));
+               lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
+                       (rndvec[2] << 16);
+               rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
+                                         vlan_id, lnk->gid, &lnk->sgid_index);
+               if (rc)
+                       goto free_lgr;
+               rc = smc_llc_link_init(lnk);
+               if (rc)
+                       goto free_lgr;
+               rc = smc_wr_alloc_link_mem(lnk);
+               if (rc)
+                       goto clear_llc_lnk;
+               rc = smc_ib_create_protection_domain(lnk);
+               if (rc)
+                       goto free_link_mem;
+               rc = smc_ib_create_queue_pair(lnk);
+               if (rc)
+                       goto dealloc_pd;
+               rc = smc_wr_create_link(lnk);
+               if (rc)
+                       goto destroy_qp;
+       }
        smc->conn.lgr = lgr;
-       rwlock_init(&lgr->conns_lock);
        spin_lock_bh(&smc_lgr_list.lock);
        list_add(&lgr->list, &smc_lgr_list.list);
        spin_unlock_bh(&smc_lgr_list.lock);
@@ -264,7 +316,12 @@ void smc_conn_free(struct smc_connection *conn)
 {
        if (!conn->lgr)
                return;
-       smc_cdc_tx_dismiss_slots(conn);
+       if (conn->lgr->is_smcd) {
+               smc_ism_unset_conn(conn);
+               tasklet_kill(&conn->rx_tsklet);
+       } else {
+               smc_cdc_tx_dismiss_slots(conn);
+       }
        smc_lgr_unregister_conn(conn);
        smc_buf_unuse(conn);
 }
@@ -280,8 +337,8 @@ static void smc_link_clear(struct smc_link *lnk)
        smc_wr_free_link_mem(lnk);
 }
 
-static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
-                        struct smc_buf_desc *buf_desc)
+static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
+                         struct smc_buf_desc *buf_desc)
 {
        struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
 
@@ -301,6 +358,28 @@ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
        kfree(buf_desc);
 }
 
+static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
+                         struct smc_buf_desc *buf_desc)
+{
+       if (is_dmb) {
+               /* restore original buf len */
+               buf_desc->len += sizeof(struct smcd_cdc_msg);
+               smc_ism_unregister_dmb(lgr->smcd, buf_desc);
+       } else {
+               kfree(buf_desc->cpu_addr);
+       }
+       kfree(buf_desc);
+}
+
+static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
+                        struct smc_buf_desc *buf_desc)
+{
+       if (lgr->is_smcd)
+               smcd_buf_free(lgr, is_rmb, buf_desc);
+       else
+               smcr_buf_free(lgr, is_rmb, buf_desc);
+}
+
 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
 {
        struct smc_buf_desc *buf_desc, *bf_desc;
@@ -332,7 +411,10 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
 void smc_lgr_free(struct smc_link_group *lgr)
 {
        smc_lgr_free_bufs(lgr);
-       smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
+       if (lgr->is_smcd)
+               smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
+       else
+               smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
        kfree(lgr);
 }
 
@@ -357,7 +439,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
        lgr->terminating = 1;
        if (!list_empty(&lgr->list)) /* forget lgr */
                list_del_init(&lgr->list);
-       smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+       if (!lgr->is_smcd)
+               smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 
        write_lock_bh(&lgr->conns_lock);
        node = rb_first(&lgr->conns_all);
@@ -374,7 +457,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
                node = rb_first(&lgr->conns_all);
        }
        write_unlock_bh(&lgr->conns_lock);
-       wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
+       if (!lgr->is_smcd)
+               wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
        smc_lgr_schedule_free_work(lgr);
 }
 
@@ -392,17 +476,44 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
 
        spin_lock_bh(&smc_lgr_list.lock);
        list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
-               if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+               if (!lgr->is_smcd &&
+                   lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
                    lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
                        __smc_lgr_terminate(lgr);
        }
        spin_unlock_bh(&smc_lgr_list.lock);
 }
 
+/* Called when SMC-D device is terminated or peer is lost */
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
+{
+       struct smc_link_group *lgr, *l;
+       LIST_HEAD(lgr_free_list);
+
+       /* run common cleanup function and build free list */
+       spin_lock_bh(&smc_lgr_list.lock);
+       list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+               if (lgr->is_smcd && lgr->smcd == dev &&
+                   (!peer_gid || lgr->peer_gid == peer_gid) &&
+                   !list_empty(&lgr->list)) {
+                       __smc_lgr_terminate(lgr);
+                       list_move(&lgr->list, &lgr_free_list);
+               }
+       }
+       spin_unlock_bh(&smc_lgr_list.lock);
+
+       /* cancel the regular free workers and actually free lgrs */
+       list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
+               list_del_init(&lgr->list);
+               cancel_delayed_work_sync(&lgr->free_work);
+               smc_lgr_free(lgr);
+       }
+}
+
 /* Determine vlan of internal TCP socket.
  * @vlan_id: address to store the determined vlan id into
  */
-static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
+int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
 {
        struct dst_entry *dst = sk_dst_get(clcsock->sk);
        struct net_device *ndev;
@@ -446,41 +557,30 @@ out:
        return rc;
 }
 
-/* determine the link gid matching the vlan id of the link group */
-static int smc_link_determine_gid(struct smc_link_group *lgr)
+static bool smcr_lgr_match(struct smc_link_group *lgr,
+                          struct smc_clc_msg_local *lcl,
+                          enum smc_lgr_role role)
 {
-       struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
-       struct ib_gid_attr gattr;
-       union ib_gid gid;
-       int i;
-
-       if (!lgr->vlan_id) {
-               lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1];
-               return 0;
-       }
+       return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
+                      SMC_SYSTEMID_LEN) &&
+               !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
+                       SMC_GID_SIZE) &&
+               !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
+                       sizeof(lcl->mac)) &&
+               lgr->role == role;
+}
 
-       for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len;
-            i++) {
-               if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
-                                &gattr))
-                       continue;
-               if (gattr.ndev) {
-                       if (is_vlan_dev(gattr.ndev) &&
-                           vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
-                               lnk->gid = gid;
-                               dev_put(gattr.ndev);
-                               return 0;
-                       }
-                       dev_put(gattr.ndev);
-               }
-       }
-       return -ENODEV;
+static bool smcd_lgr_match(struct smc_link_group *lgr,
+                          struct smcd_dev *smcismdev, u64 peer_gid)
+{
+       return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
 }
 
 /* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc,
+int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
                    struct smc_ib_device *smcibdev, u8 ibport,
-                   struct smc_clc_msg_local *lcl, int srv_first_contact)
+                   struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
+                   u64 peer_gid)
 {
        struct smc_connection *conn = &smc->conn;
        int local_contact = SMC_FIRST_CONTACT;
@@ -502,17 +602,12 @@ int smc_conn_create(struct smc_sock *smc,
        spin_lock_bh(&smc_lgr_list.lock);
        list_for_each_entry(lgr, &smc_lgr_list.list, list) {
                write_lock_bh(&lgr->conns_lock);
-               if (!memcmp(lgr->peer_systemid, lcl->id_for_peer,
-                           SMC_SYSTEMID_LEN) &&
-                   !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
-                           SMC_GID_SIZE) &&
-                   !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
-                           sizeof(lcl->mac)) &&
+               if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
+                    smcr_lgr_match(lgr, lcl, role)) &&
                    !lgr->sync_err &&
-                   (lgr->role == role) &&
-                   (lgr->vlan_id == vlan_id) &&
-                   ((role == SMC_CLNT) ||
-                    (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) {
+                   lgr->vlan_id == vlan_id &&
+                   (role == SMC_CLNT ||
+                    lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
                        /* link group found */
                        local_contact = SMC_REUSE_CONTACT;
                        conn->lgr = lgr;
@@ -535,16 +630,19 @@ int smc_conn_create(struct smc_sock *smc,
 
 create:
        if (local_contact == SMC_FIRST_CONTACT) {
-               rc = smc_lgr_create(smc, smcibdev, ibport,
-                                   lcl->id_for_peer, vlan_id);
+               rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport,
+                                   lcl->id_for_peer, vlan_id, smcd, peer_gid);
                if (rc)
                        goto out;
                smc_lgr_register_conn(conn); /* add smc conn to lgr */
-               rc = smc_link_determine_gid(conn->lgr);
        }
        conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
        conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
        conn->urg_state = SMC_URG_READ;
+       if (is_smcd) {
+               conn->rx_off = sizeof(struct smcd_cdc_msg);
+               smcd_cdc_rx_init(conn); /* init tasklet for this conn */
+       }
 #ifndef KERNEL_HAS_ATOMIC64
        spin_lock_init(&conn->acurs_lock);
 #endif
@@ -609,8 +707,8 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
        return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 }
 
-static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
-                                              bool is_rmb, int bufsize)
+static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
+                                               bool is_rmb, int bufsize)
 {
        struct smc_buf_desc *buf_desc;
        struct smc_link *lnk;
@@ -668,7 +766,44 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
        return buf_desc;
 }
 
-static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
+#define SMCD_DMBE_SIZES                7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+
+static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
+                                               bool is_dmb, int bufsize)
+{
+       struct smc_buf_desc *buf_desc;
+       int rc;
+
+       if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
+               return ERR_PTR(-EAGAIN);
+
+       /* try to alloc a new DMB */
+       buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+       if (!buf_desc)
+               return ERR_PTR(-ENOMEM);
+       if (is_dmb) {
+               rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
+               if (rc) {
+                       kfree(buf_desc);
+                       return ERR_PTR(-EAGAIN);
+               }
+               buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
+               /* CDC header stored in buf. So, pretend it was smaller */
+               buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
+       } else {
+               buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
+                                            __GFP_NOWARN | __GFP_NORETRY |
+                                            __GFP_NOMEMALLOC);
+               if (!buf_desc->cpu_addr) {
+                       kfree(buf_desc);
+                       return ERR_PTR(-EAGAIN);
+               }
+               buf_desc->len = bufsize;
+       }
+       return buf_desc;
+}
+
+static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 {
        struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
        struct smc_connection *conn = &smc->conn;
@@ -706,7 +841,11 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
                        break; /* found reusable slot */
                }
 
-               buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
+               if (is_smcd)
+                       buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
+               else
+                       buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
+
                if (PTR_ERR(buf_desc) == -ENOMEM)
                        break;
                if (IS_ERR(buf_desc))
@@ -727,7 +866,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
                conn->rmbe_size_short = bufsize_short;
                smc->sk.sk_rcvbuf = bufsize * 2;
                atomic_set(&conn->bytes_to_rcv, 0);
-               conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
+               conn->rmbe_update_limit =
+                       smc_rmb_wnd_update_limit(buf_desc->len);
+               if (is_smcd)
+                       smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
        } else {
                conn->sndbuf_desc = buf_desc;
                smc->sk.sk_sndbuf = bufsize * 2;
@@ -740,6 +882,8 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
 {
        struct smc_link_group *lgr = conn->lgr;
 
+       if (!conn->lgr || conn->lgr->is_smcd)
+               return;
        smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
                               conn->sndbuf_desc, DMA_TO_DEVICE);
 }
@@ -748,6 +892,8 @@ void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 {
        struct smc_link_group *lgr = conn->lgr;
 
+       if (!conn->lgr || conn->lgr->is_smcd)
+               return;
        smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
                                  conn->sndbuf_desc, DMA_TO_DEVICE);
 }
@@ -756,6 +902,8 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 {
        struct smc_link_group *lgr = conn->lgr;
 
+       if (!conn->lgr || conn->lgr->is_smcd)
+               return;
        smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
                               conn->rmb_desc, DMA_FROM_DEVICE);
 }
@@ -764,6 +912,8 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
 {
        struct smc_link_group *lgr = conn->lgr;
 
+       if (!conn->lgr || conn->lgr->is_smcd)
+               return;
        smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
                                  conn->rmb_desc, DMA_FROM_DEVICE);
 }
@@ -774,16 +924,16 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
  * extra RMB for every connection in a link group
  */
-int smc_buf_create(struct smc_sock *smc)
+int smc_buf_create(struct smc_sock *smc, bool is_smcd)
 {
        int rc;
 
        /* create send buffer */
-       rc = __smc_buf_create(smc, false);
+       rc = __smc_buf_create(smc, is_smcd, false);
        if (rc)
                return rc;
        /* create rmb */
-       rc = __smc_buf_create(smc, true);
+       rc = __smc_buf_create(smc, is_smcd, true);
        if (rc)
                smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
        return rc;
@@ -865,7 +1015,14 @@ void smc_core_exit(void)
        spin_unlock_bh(&smc_lgr_list.lock);
        list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
                list_del_init(&lgr->list);
-               smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+               if (!lgr->is_smcd) {
+                       struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+
+                       if (lnk->state == SMC_LNK_ACTIVE)
+                               smc_llc_send_delete_link(lnk, SMC_LLC_REQ,
+                                                        false);
+                       smc_llc_link_inactive(lnk);
+               }
                cancel_delayed_work_sync(&lgr->free_work);
                smc_lgr_free(lgr); /* free link group */
        }
index 93cb3523bf5093dbe4e30d34bd02e05eab98c5c3..c156674733c9dcb37af68e47a660785d140231b3 100644 (file)
@@ -34,7 +34,8 @@ enum smc_lgr_role {           /* possible roles of a link group */
 enum smc_link_state {                  /* possible states of a link */
        SMC_LNK_INACTIVE,       /* link is inactive */
        SMC_LNK_ACTIVATING,     /* link is being activated */
-       SMC_LNK_ACTIVE          /* link is active */
+       SMC_LNK_ACTIVE,         /* link is active */
+       SMC_LNK_DELETING,       /* link is being deleted */
 };
 
 #define SMC_WR_BUF_SIZE                48      /* size of work request buffer */
@@ -84,14 +85,15 @@ struct smc_link {
        wait_queue_head_t       wr_reg_wait;    /* wait for wr_reg result */
        enum smc_wr_reg_state   wr_reg_state;   /* state of wr_reg request */
 
-       union ib_gid            gid;            /* gid matching used vlan id */
+       u8                      gid[SMC_GID_SIZE];/* gid matching used vlan id*/
+       u8                      sgid_index;     /* gid index for vlan id      */
        u32                     peer_qpn;       /* QP number of peer */
        enum ib_mtu             path_mtu;       /* used mtu */
        enum ib_mtu             peer_mtu;       /* mtu size of peer */
        u32                     psn_initial;    /* QP tx initial packet seqno */
        u32                     peer_psn;       /* QP rx initial packet seqno */
        u8                      peer_mac[ETH_ALEN];     /* = gid[8:10||13:15] */
-       u8                      peer_gid[sizeof(union ib_gid)]; /* gid of peer*/
+       u8                      peer_gid[SMC_GID_SIZE]; /* gid of peer*/
        u8                      link_id;        /* unique # within link group */
 
        enum smc_link_state     state;          /* state of link */
@@ -124,15 +126,28 @@ struct smc_buf_desc {
        void                    *cpu_addr;      /* virtual address of buffer */
        struct page             *pages;
        int                     len;            /* length of buffer */
-       struct sg_table         sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
-       struct ib_mr            *mr_rx[SMC_LINKS_PER_LGR_MAX];
-                                               /* for rmb only: memory region
-                                                * incl. rkey provided to peer
-                                                */
-       u32                     order;          /* allocation order */
        u32                     used;           /* currently used / unused */
        u8                      reused  : 1;    /* new created / reused */
        u8                      regerr  : 1;    /* err during registration */
+       union {
+               struct { /* SMC-R */
+                       struct sg_table         sgt[SMC_LINKS_PER_LGR_MAX];
+                                               /* virtual buffer */
+                       struct ib_mr            *mr_rx[SMC_LINKS_PER_LGR_MAX];
+                                               /* for rmb only: memory region
+                                                * incl. rkey provided to peer
+                                                */
+                       u32                     order;  /* allocation order */
+               };
+               struct { /* SMC-D */
+                       unsigned short          sba_idx;
+                                               /* SBA index number */
+                       u64                     token;
+                                               /* DMB token number */
+                       dma_addr_t              dma_addr;
+                                               /* DMA address */
+               };
+       };
 };
 
 struct smc_rtoken {                            /* address/key of remote RMB */
@@ -148,12 +163,10 @@ struct smc_rtoken {                               /* address/key of remote RMB */
  * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
  */
 
+struct smcd_dev;
+
 struct smc_link_group {
        struct list_head        list;
-       enum smc_lgr_role       role;           /* client or server */
-       struct smc_link         lnk[SMC_LINKS_PER_LGR_MAX];     /* smc link */
-       char                    peer_systemid[SMC_SYSTEMID_LEN];
-                                               /* unique system_id of peer */
        struct rb_root          conns_all;      /* connection tree */
        rwlock_t                conns_lock;     /* protects conns_all */
        unsigned int            conns_num;      /* current # of connections */
@@ -163,17 +176,34 @@ struct smc_link_group {
        rwlock_t                sndbufs_lock;   /* protects tx buffers */
        struct list_head        rmbs[SMC_RMBE_SIZES];   /* rx buffers */
        rwlock_t                rmbs_lock;      /* protects rx buffers */
-       struct smc_rtoken       rtokens[SMC_RMBS_PER_LGR_MAX]
-                                      [SMC_LINKS_PER_LGR_MAX];
-                                               /* remote addr/key pairs */
-       unsigned long           rtokens_used_mask[BITS_TO_LONGS(
-                                                       SMC_RMBS_PER_LGR_MAX)];
-                                               /* used rtoken elements */
 
        u8                      id[SMC_LGR_ID_SIZE];    /* unique lgr id */
        struct delayed_work     free_work;      /* delayed freeing of an lgr */
        u8                      sync_err : 1;   /* lgr no longer fits to peer */
        u8                      terminating : 1;/* lgr is terminating */
+
+       bool                    is_smcd;        /* SMC-R or SMC-D */
+       union {
+               struct { /* SMC-R */
+                       enum smc_lgr_role       role;
+                                               /* client or server */
+                       struct smc_link         lnk[SMC_LINKS_PER_LGR_MAX];
+                                               /* smc link */
+                       char                    peer_systemid[SMC_SYSTEMID_LEN];
+                                               /* unique system_id of peer */
+                       struct smc_rtoken       rtokens[SMC_RMBS_PER_LGR_MAX]
+                                               [SMC_LINKS_PER_LGR_MAX];
+                                               /* remote addr/key pairs */
+                       DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
+                                               /* used rtoken elements */
+               };
+               struct { /* SMC-D */
+                       u64                     peer_gid;
+                                               /* Peer GID (remote) */
+                       struct smcd_dev         *smcd;
+                                               /* ISM device for VLAN reg. */
+               };
+       };
 };
 
 /* Find the connection associated with the given alert token in the link group.
@@ -217,7 +247,8 @@ void smc_lgr_free(struct smc_link_group *lgr);
 void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_buf_create(struct smc_sock *smc);
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
+int smc_buf_create(struct smc_sock *smc, bool is_smcd);
 int smc_uncompress_bufsize(u8 compressed);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
                            struct smc_clc_msg_accept_confirm *clc);
@@ -227,9 +258,19 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
+int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
+
 void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc,
+int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
                    struct smc_ib_device *smcibdev, u8 ibport,
-                   struct smc_clc_msg_local *lcl, int srv_first_contact);
+                   struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
+                   u64 peer_gid);
+void smcd_conn_free(struct smc_connection *conn);
+void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
 void smc_core_exit(void);
+
+static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
+{
+       return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+}
 #endif
index 839354402215a836556fd881c350ca0ddb6b1c1b..dbf64a93d68add6f834cb20533ffc31b82fa4f9e 100644 (file)
@@ -79,6 +79,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
                           struct nlattr *bc)
 {
        struct smc_sock *smc = smc_sk(sk);
+       struct smc_diag_fallback fallback;
        struct user_namespace *user_ns;
        struct smc_diag_msg *r;
        struct nlmsghdr *nlh;
@@ -91,11 +92,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
        r = nlmsg_data(nlh);
        smc_diag_msg_common_fill(r, sk);
        r->diag_state = sk->sk_state;
-       r->diag_fallback = smc->use_fallback;
+       if (smc->use_fallback)
+               r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP;
+       else if (smc->conn.lgr && smc->conn.lgr->is_smcd)
+               r->diag_mode = SMC_DIAG_MODE_SMCD;
+       else
+               r->diag_mode = SMC_DIAG_MODE_SMCR;
        user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk);
        if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
                goto errout;
 
+       fallback.reason = smc->fallback_rsn;
+       fallback.peer_diagnosis = smc->peer_diagnosis;
+       if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0)
+               goto errout;
+
        if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
            smc->conn.alert_token_local) {
                struct smc_connection *conn = &smc->conn;
@@ -136,7 +147,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
                        goto errout;
        }
 
-       if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
+       if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
+           (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
            !list_empty(&smc->conn.lgr->list)) {
                struct smc_diag_lgrinfo linfo = {
                        .role = smc->conn.lgr->role,
@@ -148,13 +160,28 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
                       smc->conn.lgr->lnk[0].smcibdev->ibdev->name,
                       sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name));
                smc_gid_be16_convert(linfo.lnk[0].gid,
-                                    smc->conn.lgr->lnk[0].gid.raw);
+                                    smc->conn.lgr->lnk[0].gid);
                smc_gid_be16_convert(linfo.lnk[0].peer_gid,
                                     smc->conn.lgr->lnk[0].peer_gid);
 
                if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
                        goto errout;
        }
+       if (smc->conn.lgr && smc->conn.lgr->is_smcd &&
+           (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
+           !list_empty(&smc->conn.lgr->list)) {
+               struct smc_connection *conn = &smc->conn;
+               struct smcd_diag_dmbinfo dinfo = {
+                       .linkid = *((u32 *)conn->lgr->id),
+                       .peer_gid = conn->lgr->peer_gid,
+                       .my_gid = conn->lgr->smcd->local_gid,
+                       .token = conn->rmb_desc->token,
+                       .peer_token = conn->peer_token
+               };
+
+               if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0)
+                       goto errout;
+       }
 
        nlmsg_end(skb, nlh);
        return 0;
index 0eed7ab9f28b54c77010d85558d1b26b8e65b208..2cc64bc8ae20cd44d2cae2452cb7ab5406797ec3 100644 (file)
@@ -68,7 +68,7 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk)
        qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);
        qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
        rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport);
-       rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, 0, 1, 0);
+       rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0);
        rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);
        memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
               sizeof(lnk->peer_mac));
@@ -112,8 +112,7 @@ int smc_ib_modify_qp_reset(struct smc_link *lnk)
 
 int smc_ib_ready_link(struct smc_link *lnk)
 {
-       struct smc_link_group *lgr =
-               container_of(lnk, struct smc_link_group, lnk[0]);
+       struct smc_link_group *lgr = smc_get_lgr(lnk);
        int rc = 0;
 
        rc = smc_ib_modify_qp_init(lnk);
@@ -143,6 +142,93 @@ out:
        return rc;
 }
 
+static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport)
+{
+       struct ib_gid_attr gattr;
+       union ib_gid gid;
+       int rc;
+
+       rc = ib_query_gid(smcibdev->ibdev, ibport, 0, &gid, &gattr);
+       if (rc || !gattr.ndev)
+               return -ENODEV;
+
+       memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
+       dev_put(gattr.ndev);
+       return 0;
+}
+
+/* Create an identifier unique for this instance of SMC-R.
+ * The MAC-address of the first active registered IB device
+ * plus a random 2-byte number is used to create this identifier.
+ * This name is delivered to the peer during connection initialization.
+ */
+static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
+                                               u8 ibport)
+{
+       memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
+              sizeof(smcibdev->mac[ibport - 1]));
+       get_random_bytes(&local_systemid[0], 2);
+}
+
+bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
+{
+       return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
+}
+
+/* determine the gid for an ib-device port and vlan id */
+int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
+                        unsigned short vlan_id, u8 gid[], u8 *sgid_index)
+{
+       struct ib_gid_attr gattr;
+       union ib_gid _gid;
+       int i;
+
+       for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
+               memset(&_gid, 0, SMC_GID_SIZE);
+               memset(&gattr, 0, sizeof(gattr));
+               if (ib_query_gid(smcibdev->ibdev, ibport, i, &_gid, &gattr))
+                       continue;
+               if (!gattr.ndev)
+                       continue;
+               if (((!vlan_id && !is_vlan_dev(gattr.ndev)) ||
+                    (vlan_id && is_vlan_dev(gattr.ndev) &&
+                     vlan_dev_vlan_id(gattr.ndev) == vlan_id)) &&
+                   gattr.gid_type == IB_GID_TYPE_IB) {
+                       if (gid)
+                               memcpy(gid, &_gid, SMC_GID_SIZE);
+                       if (sgid_index)
+                               *sgid_index = i;
+                       dev_put(gattr.ndev);
+                       return 0;
+               }
+               dev_put(gattr.ndev);
+       }
+       return -ENODEV;
+}
+
+static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
+{
+       int rc;
+
+       memset(&smcibdev->pattr[ibport - 1], 0,
+              sizeof(smcibdev->pattr[ibport - 1]));
+       rc = ib_query_port(smcibdev->ibdev, ibport,
+                          &smcibdev->pattr[ibport - 1]);
+       if (rc)
+               goto out;
+       /* the SMC protocol requires specification of the RoCE MAC address */
+       rc = smc_ib_fill_mac(smcibdev, ibport);
+       if (rc)
+               goto out;
+       if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
+                    sizeof(local_systemid)) &&
+           smc_ib_port_active(smcibdev, ibport))
+               /* create unique system identifier */
+               smc_ib_define_local_systemid(smcibdev, ibport);
+out:
+       return rc;
+}
+
 /* process context wrapper for might_sleep smc_ib_remember_port_attr */
 static void smc_ib_port_event_work(struct work_struct *work)
 {
@@ -370,62 +456,6 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
        buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
 }
 
-static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
-{
-       struct ib_gid_attr gattr;
-       int rc;
-
-       rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
-                         &smcibdev->gid[ibport - 1], &gattr);
-       if (rc || !gattr.ndev)
-               return -ENODEV;
-
-       memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
-       dev_put(gattr.ndev);
-       return 0;
-}
-
-/* Create an identifier unique for this instance of SMC-R.
- * The MAC-address of the first active registered IB device
- * plus a random 2-byte number is used to create this identifier.
- * This name is delivered to the peer during connection initialization.
- */
-static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
-                                               u8 ibport)
-{
-       memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
-              sizeof(smcibdev->mac[ibport - 1]));
-       get_random_bytes(&local_systemid[0], 2);
-}
-
-bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
-{
-       return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
-}
-
-int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
-{
-       int rc;
-
-       memset(&smcibdev->pattr[ibport - 1], 0,
-              sizeof(smcibdev->pattr[ibport - 1]));
-       rc = ib_query_port(smcibdev->ibdev, ibport,
-                          &smcibdev->pattr[ibport - 1]);
-       if (rc)
-               goto out;
-       /* the SMC protocol requires specification of the RoCE MAC address */
-       rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
-       if (rc)
-               goto out;
-       if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
-                    sizeof(local_systemid)) &&
-           smc_ib_port_active(smcibdev, ibport))
-               /* create unique system identifier */
-               smc_ib_define_local_systemid(smcibdev, ibport);
-out:
-       return rc;
-}
-
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
 {
        struct ib_cq_init_attr cqattr = {
@@ -454,9 +484,6 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
                smcibdev->roce_cq_recv = NULL;
                goto err;
        }
-       INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
-                             smc_ib_global_event_handler);
-       ib_register_event_handler(&smcibdev->event_handler);
        smc_wr_add_dev(smcibdev);
        smcibdev->initialized = 1;
        return rc;
@@ -472,7 +499,6 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
                return;
        smcibdev->initialized = 0;
        smc_wr_remove_dev(smcibdev);
-       ib_unregister_event_handler(&smcibdev->event_handler);
        ib_destroy_cq(smcibdev->roce_cq_recv);
        ib_destroy_cq(smcibdev->roce_cq_send);
 }
@@ -483,6 +509,8 @@ static struct ib_client smc_ib_client;
 static void smc_ib_add_dev(struct ib_device *ibdev)
 {
        struct smc_ib_device *smcibdev;
+       u8 port_cnt;
+       int i;
 
        if (ibdev->node_type != RDMA_NODE_IB_CA)
                return;
@@ -498,6 +526,21 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
        list_add_tail(&smcibdev->list, &smc_ib_devices.list);
        spin_unlock(&smc_ib_devices.lock);
        ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
+       INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
+                             smc_ib_global_event_handler);
+       ib_register_event_handler(&smcibdev->event_handler);
+
+       /* trigger reading of the port attributes */
+       port_cnt = smcibdev->ibdev->phys_port_cnt;
+       for (i = 0;
+            i < min_t(size_t, port_cnt, SMC_MAX_PORTS);
+            i++) {
+               set_bit(i, &smcibdev->port_event_mask);
+               /* determine pnetids of the port */
+               smc_pnetid_by_dev_port(ibdev->dev.parent, i,
+                                      smcibdev->pnetid[i]);
+       }
+       schedule_work(&smcibdev->port_event_work);
 }
 
 /* callback function for ib_register_client() */
@@ -512,6 +555,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
        spin_unlock(&smc_ib_devices.lock);
        smc_pnet_remove_by_ibdev(smcibdev);
        smc_ib_cleanup_per_ibdev(smcibdev);
+       ib_unregister_event_handler(&smcibdev->event_handler);
        kfree(smcibdev);
 }
 
index e90630dadf8e9565e129e07c0ff8e782acab7fb8..bac7fd65a4c031ff059e9e8ac0377dcb57c835ab 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/interrupt.h>
 #include <linux/if_ether.h>
 #include <rdma/ib_verbs.h>
+#include <net/smc.h>
 
 #define SMC_MAX_PORTS                  2       /* Max # of ports */
 #define SMC_GID_SIZE                   sizeof(union ib_gid)
@@ -39,7 +40,8 @@ struct smc_ib_device {                                /* ib-device infos for smc */
        struct tasklet_struct   recv_tasklet;   /* called by recv cq handler */
        char                    mac[SMC_MAX_PORTS][ETH_ALEN];
                                                /* mac address per port*/
-       union ib_gid            gid[SMC_MAX_PORTS]; /* gid per port */
+       u8                      pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN];
+                                               /* pnetid per port */
        u8                      initialized : 1; /* ib dev CQ, evthdl done */
        struct work_struct      port_event_work;
        unsigned long           port_event_mask;
@@ -51,7 +53,6 @@ struct smc_link;
 int smc_ib_register_client(void) __init;
 void smc_ib_unregister_client(void);
 bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
 int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
                      struct smc_buf_desc *buf_slot,
                      enum dma_data_direction data_direction);
@@ -75,4 +76,6 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
 void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
                               struct smc_buf_desc *buf_slot,
                               enum dma_data_direction data_direction);
+int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
+                        unsigned short vlan_id, u8 gid[], u8 *sgid_index);
 #endif
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
new file mode 100644 (file)
index 0000000..cfade7f
--- /dev/null
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Shared Memory Communications Direct over ISM devices (SMC-D)
+ *
+ * Functions for ISM device.
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+
+#include "smc.h"
+#include "smc_core.h"
+#include "smc_ism.h"
+#include "smc_pnet.h"
+
+struct smcd_dev_list smcd_dev_list = {
+       .list = LIST_HEAD_INIT(smcd_dev_list.list),
+       .lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock)
+};
+
+/* Test if an ISM communication is possible. */
+int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
+{
+       return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0,
+                                          vlan_id);
+}
+
+int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos,
+                 void *data, size_t len)
+{
+       int rc;
+
+       rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal,
+                                 pos->offset, data, len);
+
+       return rc < 0 ? rc : 0;
+}
+
+/* Set a connection using this DMBE. */
+void smc_ism_set_conn(struct smc_connection *conn)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
+       conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = conn;
+       spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
+}
+
+/* Unset a connection using this DMBE. */
+void smc_ism_unset_conn(struct smc_connection *conn)
+{
+       unsigned long flags;
+
+       if (!conn->rmb_desc)
+               return;
+
+       spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
+       conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = NULL;
+       spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
+}
+
+/* Register a VLAN identifier with the ISM device. Use a reference count
+ * and add a VLAN identifier only when the first DMB using this VLAN is
+ * registered.
+ */
+int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid)
+{
+       struct smc_ism_vlanid *new_vlan, *vlan;
+       unsigned long flags;
+       int rc = 0;
+
+       if (!vlanid)                    /* No valid vlan id */
+               return -EINVAL;
+
+       /* create new vlan entry, in case we need it */
+       new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL);
+       if (!new_vlan)
+               return -ENOMEM;
+       new_vlan->vlanid = vlanid;
+       refcount_set(&new_vlan->refcnt, 1);
+
+       /* if there is an existing entry, increase count and return */
+       spin_lock_irqsave(&smcd->lock, flags);
+       list_for_each_entry(vlan, &smcd->vlan, list) {
+               if (vlan->vlanid == vlanid) {
+                       refcount_inc(&vlan->refcnt);
+                       kfree(new_vlan);
+                       goto out;
+               }
+       }
+
+       /* no existing entry found.
+        * add new entry to device; might fail, e.g., if HW limit reached
+        */
+       if (smcd->ops->add_vlan_id(smcd, vlanid)) {
+               kfree(new_vlan);
+               rc = -EIO;
+               goto out;
+       }
+       list_add_tail(&new_vlan->list, &smcd->vlan);
+out:
+       spin_unlock_irqrestore(&smcd->lock, flags);
+       return rc;
+}
+
+/* Unregister a VLAN identifier with the ISM device. Use a reference count
+ * and remove a VLAN identifier only when the last DMB using this VLAN is
+ * unregistered.
+ */
+int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid)
+{
+       struct smc_ism_vlanid *vlan;
+       unsigned long flags;
+       bool found = false;
+       int rc = 0;
+
+       if (!vlanid)                    /* No valid vlan id */
+               return -EINVAL;
+
+       spin_lock_irqsave(&smcd->lock, flags);
+       list_for_each_entry(vlan, &smcd->vlan, list) {
+               if (vlan->vlanid == vlanid) {
+                       if (!refcount_dec_and_test(&vlan->refcnt))
+                               goto out;
+                       found = true;
+                       break;
+               }
+       }
+       if (!found) {
+               rc = -ENOENT;
+               goto out;               /* VLAN id not in table */
+       }
+
+       /* Found and the last reference just gone */
+       if (smcd->ops->del_vlan_id(smcd, vlanid))
+               rc = -EIO;
+       list_del(&vlan->list);
+       kfree(vlan);
+out:
+       spin_unlock_irqrestore(&smcd->lock, flags);
+       return rc;
+}
+
+int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
+{
+       struct smcd_dmb dmb;
+
+       memset(&dmb, 0, sizeof(dmb));
+       dmb.dmb_tok = dmb_desc->token;
+       dmb.sba_idx = dmb_desc->sba_idx;
+       dmb.cpu_addr = dmb_desc->cpu_addr;
+       dmb.dma_addr = dmb_desc->dma_addr;
+       dmb.dmb_len = dmb_desc->len;
+       return smcd->ops->unregister_dmb(smcd, &dmb);
+}
+
+int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
+                        struct smc_buf_desc *dmb_desc)
+{
+       struct smcd_dmb dmb;
+       int rc;
+
+       memset(&dmb, 0, sizeof(dmb));
+       dmb.dmb_len = dmb_len;
+       dmb.sba_idx = dmb_desc->sba_idx;
+       dmb.vlan_id = lgr->vlan_id;
+       dmb.rgid = lgr->peer_gid;
+       rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb);
+       if (!rc) {
+               dmb_desc->sba_idx = dmb.sba_idx;
+               dmb_desc->token = dmb.dmb_tok;
+               dmb_desc->cpu_addr = dmb.cpu_addr;
+               dmb_desc->dma_addr = dmb.dma_addr;
+               dmb_desc->len = dmb.dmb_len;
+       }
+       return rc;
+}
+
+struct smc_ism_event_work {
+       struct work_struct work;
+       struct smcd_dev *smcd;
+       struct smcd_event event;
+};
+
+/* worker for SMC-D events */
+static void smc_ism_event_work(struct work_struct *work)
+{
+       struct smc_ism_event_work *wrk =
+               container_of(work, struct smc_ism_event_work, work);
+
+       switch (wrk->event.type) {
+       case ISM_EVENT_GID:     /* GID event, token is peer GID */
+               smc_smcd_terminate(wrk->smcd, wrk->event.tok);
+               break;
+       case ISM_EVENT_DMB:
+               break;
+       }
+       kfree(wrk);
+}
+
+static void smcd_release(struct device *dev)
+{
+       struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev);
+
+       kfree(smcd->conn);
+       kfree(smcd);
+}
+
+struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
+                               const struct smcd_ops *ops, int max_dmbs)
+{
+       struct smcd_dev *smcd;
+
+       smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
+       if (!smcd)
+               return NULL;
+       smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
+                            GFP_KERNEL);
+       if (!smcd->conn) {
+               kfree(smcd);
+               return NULL;
+       }
+
+       smcd->dev.parent = parent;
+       smcd->dev.release = smcd_release;
+       device_initialize(&smcd->dev);
+       dev_set_name(&smcd->dev, name);
+       smcd->ops = ops;
+       smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
+
+       spin_lock_init(&smcd->lock);
+       INIT_LIST_HEAD(&smcd->vlan);
+       smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
+                                                WQ_MEM_RECLAIM, name);
+       return smcd;
+}
+EXPORT_SYMBOL_GPL(smcd_alloc_dev);
+
+int smcd_register_dev(struct smcd_dev *smcd)
+{
+       spin_lock(&smcd_dev_list.lock);
+       list_add_tail(&smcd->list, &smcd_dev_list.list);
+       spin_unlock(&smcd_dev_list.lock);
+
+       return device_add(&smcd->dev);
+}
+EXPORT_SYMBOL_GPL(smcd_register_dev);
+
+void smcd_unregister_dev(struct smcd_dev *smcd)
+{
+       spin_lock(&smcd_dev_list.lock);
+       list_del(&smcd->list);
+       spin_unlock(&smcd_dev_list.lock);
+       flush_workqueue(smcd->event_wq);
+       destroy_workqueue(smcd->event_wq);
+       smc_smcd_terminate(smcd, 0);
+
+       device_del(&smcd->dev);
+}
+EXPORT_SYMBOL_GPL(smcd_unregister_dev);
+
+void smcd_free_dev(struct smcd_dev *smcd)
+{
+       put_device(&smcd->dev);
+}
+EXPORT_SYMBOL_GPL(smcd_free_dev);
+
+/* SMCD Device event handler. Called from ISM device interrupt handler.
+ * Parameters are smcd device pointer,
+ * - event->type (0 --> DMB, 1 --> GID),
+ * - event->code (event code),
+ * - event->tok (either DMB token when event type 0, or GID when event type 1)
+ * - event->time (time of day)
+ * - event->info (debug info).
+ *
+ * Context:
+ * - Function called in IRQ context from ISM device driver event handler.
+ */
+void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
+{
+       struct smc_ism_event_work *wrk;
+
+       /* copy event to event work queue, and let it be handled there */
+       wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
+       if (!wrk)
+               return;
+       INIT_WORK(&wrk->work, smc_ism_event_work);
+       wrk->smcd = smcd;
+       wrk->event = *event;
+       queue_work(smcd->event_wq, &wrk->work);
+}
+EXPORT_SYMBOL_GPL(smcd_handle_event);
+
+/* SMCD Device interrupt handler. Called from ISM device interrupt handler.
+ * Parameters are smcd device pointer and DMB number. Find the connection and
+ * schedule the tasklet for this connection.
+ *
+ * Context:
+ * - Function called in IRQ context from ISM device driver IRQ handler.
+ */
+void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno)
+{
+       struct smc_connection *conn = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&smcd->lock, flags);
+       conn = smcd->conn[dmbno];
+       if (conn)
+               tasklet_schedule(&conn->rx_tsklet);
+       spin_unlock_irqrestore(&smcd->lock, flags);
+}
+EXPORT_SYMBOL_GPL(smcd_handle_irq);
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
new file mode 100644 (file)
index 0000000..aee45b8
--- /dev/null
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Shared Memory Communications Direct over ISM devices (SMC-D)
+ *
+ * SMC-D ISM device structure definitions.
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#ifndef SMCD_ISM_H
+#define SMCD_ISM_H
+
+#include <linux/uio.h>
+
+#include "smc.h"
+
+struct smcd_dev_list { /* List of SMCD devices */
+       struct list_head list;
+       spinlock_t lock;        /* Protects list of devices */
+};
+
+extern struct smcd_dev_list    smcd_dev_list; /* list of smcd devices */
+
+struct smc_ism_vlanid {                        /* VLAN id set on ISM device */
+       struct list_head list;
+       unsigned short vlanid;          /* Vlan id */
+       refcount_t refcnt;              /* Reference count */
+};
+
+struct smc_ism_position {      /* ISM device position to write to */
+       u64 token;              /* Token of DMB */
+       u32 offset;             /* Offset into DMBE */
+       u8 index;               /* Index of DMBE */
+       u8 signal;              /* Generate interrupt on owner side */
+};
+
+struct smcd_dev;
+
+int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev);
+void smc_ism_set_conn(struct smc_connection *conn);
+void smc_ism_unset_conn(struct smc_connection *conn);
+int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id);
+int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id);
+int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
+                        struct smc_buf_desc *dmb_desc);
+int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
+int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
+                 void *data, size_t len);
+#endif
index 5800a6b43d830018ffc91a4272190508e8c9f233..9c916c709ca71129d52d3e2d695e5a8076244458 100644 (file)
@@ -182,12 +182,10 @@ static int smc_llc_add_pending_send(struct smc_link *link,
 }
 
 /* high-level API to send LLC confirm link */
-int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
-                             union ib_gid *gid,
+int smc_llc_send_confirm_link(struct smc_link *link,
                              enum smc_llc_reqresp reqresp)
 {
-       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
-                                                 lnk[SMC_SINGLE_LINK]);
+       struct smc_link_group *lgr = smc_get_lgr(link);
        struct smc_llc_msg_confirm_link *confllc;
        struct smc_wr_tx_pend_priv *pend;
        struct smc_wr_buf *wr_buf;
@@ -203,8 +201,9 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
        confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
        if (reqresp == SMC_LLC_RESP)
                confllc->hd.flags |= SMC_LLC_FLAG_RESP;
-       memcpy(confllc->sender_mac, mac, ETH_ALEN);
-       memcpy(confllc->sender_gid, gid, SMC_GID_SIZE);
+       memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1],
+              ETH_ALEN);
+       memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);
        hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
        confllc->link_num = link->link_id;
        memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
@@ -241,8 +240,7 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link,
 
 /* prepare an add link message */
 static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
-                                 struct smc_link *link, u8 mac[],
-                                 union ib_gid *gid,
+                                 struct smc_link *link, u8 mac[], u8 gid[],
                                  enum smc_llc_reqresp reqresp)
 {
        memset(addllc, 0, sizeof(*addllc));
@@ -259,8 +257,7 @@ static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
 }
 
 /* send ADD LINK request or response */
-int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
-                         union ib_gid *gid,
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
                          enum smc_llc_reqresp reqresp)
 {
        struct smc_llc_msg_add_link *addllc;
@@ -281,7 +278,7 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
 /* prepare a delete link message */
 static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
                                     struct smc_link *link,
-                                    enum smc_llc_reqresp reqresp)
+                                    enum smc_llc_reqresp reqresp, bool orderly)
 {
        memset(delllc, 0, sizeof(*delllc));
        delllc->hd.common.type = SMC_LLC_DELETE_LINK;
@@ -290,13 +287,14 @@ static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
                delllc->hd.flags |= SMC_LLC_FLAG_RESP;
        /* DEL_LINK_ALL because only 1 link supported */
        delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
-       delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+       if (orderly)
+               delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
        delllc->link_num = link->link_id;
 }
 
 /* send DELETE LINK request or response */
 int smc_llc_send_delete_link(struct smc_link *link,
-                            enum smc_llc_reqresp reqresp)
+                            enum smc_llc_reqresp reqresp, bool orderly)
 {
        struct smc_llc_msg_del_link *delllc;
        struct smc_wr_tx_pend_priv *pend;
@@ -307,7 +305,7 @@ int smc_llc_send_delete_link(struct smc_link *link,
        if (rc)
                return rc;
        delllc = (struct smc_llc_msg_del_link *)wr_buf;
-       smc_llc_prep_delete_link(delllc, link, reqresp);
+       smc_llc_prep_delete_link(delllc, link, reqresp, orderly);
        /* send llc message */
        rc = smc_wr_tx_send(link, pend);
        return rc;
@@ -381,11 +379,9 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
 static void smc_llc_rx_confirm_link(struct smc_link *link,
                                    struct smc_llc_msg_confirm_link *llc)
 {
-       struct smc_link_group *lgr;
+       struct smc_link_group *lgr = smc_get_lgr(link);
        int conf_rc;
 
-       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
-
        /* RMBE eyecatchers are not supported */
        if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
                conf_rc = 0;
@@ -411,8 +407,7 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
 static void smc_llc_rx_add_link(struct smc_link *link,
                                struct smc_llc_msg_add_link *llc)
 {
-       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
-                                                 lnk[SMC_SINGLE_LINK]);
+       struct smc_link_group *lgr = smc_get_lgr(link);
 
        if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
                if (link->state == SMC_LNK_ACTIVATING)
@@ -426,14 +421,12 @@ static void smc_llc_rx_add_link(struct smc_link *link,
                if (lgr->role == SMC_SERV) {
                        smc_llc_prep_add_link(llc, link,
                                        link->smcibdev->mac[link->ibport - 1],
-                                       &link->smcibdev->gid[link->ibport - 1],
-                                       SMC_LLC_REQ);
+                                       link->gid, SMC_LLC_REQ);
 
                } else {
                        smc_llc_prep_add_link(llc, link,
                                        link->smcibdev->mac[link->ibport - 1],
-                                       &link->smcibdev->gid[link->ibport - 1],
-                                       SMC_LLC_RESP);
+                                       link->gid, SMC_LLC_RESP);
                }
                smc_llc_send_message(link, llc, sizeof(*llc));
        }
@@ -442,22 +435,23 @@ static void smc_llc_rx_add_link(struct smc_link *link,
 static void smc_llc_rx_delete_link(struct smc_link *link,
                                   struct smc_llc_msg_del_link *llc)
 {
-       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
-                                                 lnk[SMC_SINGLE_LINK]);
+       struct smc_link_group *lgr = smc_get_lgr(link);
 
        if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
                if (lgr->role == SMC_SERV)
-                       smc_lgr_terminate(lgr);
+                       smc_lgr_schedule_free_work_fast(lgr);
        } else {
+               smc_lgr_forget(lgr);
+               smc_llc_link_deleting(link);
                if (lgr->role == SMC_SERV) {
-                       smc_lgr_forget(lgr);
-                       smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ);
-                       smc_llc_send_message(link, llc, sizeof(*llc));
+                       /* client asks to delete this link, send request */
+                       smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
                } else {
-                       smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP);
-                       smc_llc_send_message(link, llc, sizeof(*llc));
-                       smc_lgr_terminate(lgr);
+                       /* server requests to delete this link, send response */
+                       smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
                }
+               smc_llc_send_message(link, llc, sizeof(*llc));
+               smc_lgr_schedule_free_work_fast(lgr);
        }
 }
 
@@ -476,17 +470,14 @@ static void smc_llc_rx_test_link(struct smc_link *link,
 static void smc_llc_rx_confirm_rkey(struct smc_link *link,
                                    struct smc_llc_msg_confirm_rkey *llc)
 {
-       struct smc_link_group *lgr;
        int rc;
 
-       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
-
        if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
                link->llc_confirm_rkey_rc = llc->hd.flags &
                                            SMC_LLC_FLAG_RKEY_NEG;
                complete(&link->llc_confirm_rkey);
        } else {
-               rc = smc_rtoken_add(lgr,
+               rc = smc_rtoken_add(smc_get_lgr(link),
                                    llc->rtoken[0].rmb_vaddr,
                                    llc->rtoken[0].rmb_key);
 
@@ -514,18 +505,15 @@ static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
 static void smc_llc_rx_delete_rkey(struct smc_link *link,
                                   struct smc_llc_msg_delete_rkey *llc)
 {
-       struct smc_link_group *lgr;
        u8 err_mask = 0;
        int i, max;
 
-       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
-
        if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
                /* unused as long as we don't send this type of msg */
        } else {
                max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
                for (i = 0; i < max; i++) {
-                       if (smc_rtoken_delete(lgr, llc->rkey[i]))
+                       if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i]))
                                err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
                }
 
@@ -583,12 +571,10 @@ static void smc_llc_testlink_work(struct work_struct *work)
        struct smc_link *link = container_of(to_delayed_work(work),
                                             struct smc_link, llc_testlink_wrk);
        unsigned long next_interval;
-       struct smc_link_group *lgr;
        unsigned long expire_time;
        u8 user_data[16] = { 0 };
        int rc;
 
-       lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
        if (link->state != SMC_LNK_ACTIVE)
                return;         /* don't reschedule worker */
        expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
@@ -602,7 +588,7 @@ static void smc_llc_testlink_work(struct work_struct *work)
        rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
                                                       SMC_LLC_WAIT_TIME);
        if (rc <= 0) {
-               smc_lgr_terminate(lgr);
+               smc_lgr_terminate(smc_get_lgr(link));
                return;
        }
        next_interval = link->llc_testlink_time;
@@ -613,8 +599,7 @@ out:
 
 int smc_llc_link_init(struct smc_link *link)
 {
-       struct smc_link_group *lgr = container_of(link, struct smc_link_group,
-                                                 lnk[SMC_SINGLE_LINK]);
+       struct smc_link_group *lgr = smc_get_lgr(link);
        link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM,
                                               *((u32 *)lgr->id),
                                               link->link_id);
@@ -640,6 +625,11 @@ void smc_llc_link_active(struct smc_link *link, int testlink_time)
        }
 }
 
+void smc_llc_link_deleting(struct smc_link *link)
+{
+       link->state = SMC_LNK_DELETING;
+}
+
 /* called in tasklet context */
 void smc_llc_link_inactive(struct smc_link *link)
 {
index 65c8645e96a1438febacdcbe3470003d5b75b504..9e2ff088e30188e08c1658d9904838318b18be12 100644 (file)
@@ -36,14 +36,15 @@ enum smc_llc_msg_type {
 };
 
 /* transmit */
-int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
+int smc_llc_send_confirm_link(struct smc_link *lnk,
                              enum smc_llc_reqresp reqresp);
-int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
                          enum smc_llc_reqresp reqresp);
 int smc_llc_send_delete_link(struct smc_link *link,
-                            enum smc_llc_reqresp reqresp);
+                            enum smc_llc_reqresp reqresp, bool orderly);
 int smc_llc_link_init(struct smc_link *link);
 void smc_llc_link_active(struct smc_link *link, int testlink_time);
+void smc_llc_link_deleting(struct smc_link *link);
 void smc_llc_link_inactive(struct smc_link *link);
 void smc_llc_link_clear(struct smc_link *link);
 int smc_llc_do_confirm_rkey(struct smc_link *link,
index d7b88b2d1b224195b2d82523c047052c67f2e1eb..01c6ce042a1cdb338d81167b1a495693a2a22154 100644 (file)
 
 #include "smc_pnet.h"
 #include "smc_ib.h"
-
-#define SMC_MAX_PNET_ID_LEN    16      /* Max. length of PNET id */
+#include "smc_ism.h"
 
 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
        [SMC_PNETID_NAME] = {
                .type = NLA_NUL_STRING,
-               .len = SMC_MAX_PNET_ID_LEN - 1
+               .len = SMC_MAX_PNETID_LEN - 1
        },
        [SMC_PNETID_ETHNAME] = {
                .type = NLA_NUL_STRING,
@@ -65,7 +64,7 @@ static struct smc_pnettable {
  */
 struct smc_pnetentry {
        struct list_head list;
-       char pnet_name[SMC_MAX_PNET_ID_LEN + 1];
+       char pnet_name[SMC_MAX_PNETID_LEN + 1];
        struct net_device *ndev;
        struct smc_ib_device *smcibdev;
        u8 ib_port;
@@ -209,7 +208,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
                return false;
        while (--end >= bf && isspace(*end))
                ;
-       if (end - bf >= SMC_MAX_PNET_ID_LEN)
+       if (end - bf >= SMC_MAX_PNETID_LEN)
                return false;
        while (bf <= end) {
                if (!isalnum(*bf))
@@ -358,9 +357,6 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
                kfree(pnetelem);
                return rc;
        }
-       rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port);
-       if (rc)
-               smc_pnet_remove_by_pnetid(pnetelem->pnet_name);
        return rc;
 }
 
@@ -485,10 +481,10 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
        case NETDEV_REBOOT:
        case NETDEV_UNREGISTER:
                smc_pnet_remove_by_ndev(event_dev);
+               return NOTIFY_OK;
        default:
-               break;
+               return NOTIFY_DONE;
        }
-       return NOTIFY_DONE;
 }
 
 static struct notifier_block smc_netdev_notifier = {
@@ -515,28 +511,104 @@ void smc_pnet_exit(void)
        genl_unregister_family(&smc_pnet_nl_family);
 }
 
-/* PNET table analysis for a given sock:
- * determine ib_device and port belonging to used internal TCP socket
- * ethernet interface.
+/* Determine one base device for stacked net devices.
+ * If the lower device level contains more than one devices
+ * (for instance with bonding slaves), just the first device
+ * is used to reach a base device.
  */
-void smc_pnet_find_roce_resource(struct sock *sk,
-                                struct smc_ib_device **smcibdev, u8 *ibport)
+static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
 {
-       struct dst_entry *dst = sk_dst_get(sk);
-       struct smc_pnetentry *pnetelem;
+       int i, nest_lvl;
 
-       *smcibdev = NULL;
-       *ibport = 0;
+       rtnl_lock();
+       nest_lvl = dev_get_nest_level(ndev);
+       for (i = 0; i < nest_lvl; i++) {
+               struct list_head *lower = &ndev->adj_list.lower;
+
+               if (list_empty(lower))
+                       break;
+               lower = lower->next;
+               ndev = netdev_lower_get_next(ndev, &lower);
+       }
+       rtnl_unlock();
+       return ndev;
+}
+
+/* Determine the corresponding IB device port based on the hardware PNETID.
+ * Searching stops at the first matching active IB device port with vlan_id
+ * configured.
+ */
+static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
+                                        struct smc_ib_device **smcibdev,
+                                        u8 *ibport, unsigned short vlan_id,
+                                        u8 gid[])
+{
+       u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+       struct smc_ib_device *ibdev;
+       int i;
+
+       ndev = pnet_find_base_ndev(ndev);
+       if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
+                                  ndev_pnetid))
+               return; /* pnetid could not be determined */
+
+       spin_lock(&smc_ib_devices.lock);
+       list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+               for (i = 1; i <= SMC_MAX_PORTS; i++) {
+                       if (!rdma_is_port_valid(ibdev->ibdev, i))
+                               continue;
+                       if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
+                                   SMC_MAX_PNETID_LEN) &&
+                           smc_ib_port_active(ibdev, i) &&
+                           !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
+                                                 NULL))  {
+                               *smcibdev = ibdev;
+                               *ibport = i;
+                               goto out;
+                       }
+               }
+       }
+out:
+       spin_unlock(&smc_ib_devices.lock);
+}
+
+static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
+                                       struct smcd_dev **smcismdev)
+{
+       u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+       struct smcd_dev *ismdev;
+
+       ndev = pnet_find_base_ndev(ndev);
+       if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
+                                  ndev_pnetid))
+               return; /* pnetid could not be determined */
+
+       spin_lock(&smcd_dev_list.lock);
+       list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
+               if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
+                       *smcismdev = ismdev;
+                       break;
+               }
+       }
+       spin_unlock(&smcd_dev_list.lock);
+}
+
+/* Lookup of coupled ib_device via SMC pnet table */
+static void smc_pnet_find_roce_by_table(struct net_device *netdev,
+                                       struct smc_ib_device **smcibdev,
+                                       u8 *ibport, unsigned short vlan_id,
+                                       u8 gid[])
+{
+       struct smc_pnetentry *pnetelem;
 
-       if (!dst)
-               return;
-       if (!dst->dev)
-               goto out_rel;
        read_lock(&smc_pnettable.lock);
        list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
-               if (dst->dev == pnetelem->ndev) {
+               if (netdev == pnetelem->ndev) {
                        if (smc_ib_port_active(pnetelem->smcibdev,
-                                              pnetelem->ib_port)) {
+                                              pnetelem->ib_port) &&
+                           !smc_ib_determine_gid(pnetelem->smcibdev,
+                                                 pnetelem->ib_port, vlan_id,
+                                                 gid, NULL)) {
                                *smcibdev = pnetelem->smcibdev;
                                *ibport = pnetelem->ib_port;
                        }
@@ -544,6 +616,55 @@ void smc_pnet_find_roce_resource(struct sock *sk,
                }
        }
        read_unlock(&smc_pnettable.lock);
+}
+
+/* PNET table analysis for a given sock:
+ * determine ib_device and port belonging to used internal TCP socket
+ * ethernet interface.
+ */
+void smc_pnet_find_roce_resource(struct sock *sk,
+                                struct smc_ib_device **smcibdev, u8 *ibport,
+                                unsigned short vlan_id, u8 gid[])
+{
+       struct dst_entry *dst = sk_dst_get(sk);
+
+       *smcibdev = NULL;
+       *ibport = 0;
+
+       if (!dst)
+               goto out;
+       if (!dst->dev)
+               goto out_rel;
+
+       /* if possible, lookup via hardware-defined pnetid */
+       smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid);
+       if (*smcibdev)
+               goto out_rel;
+
+       /* lookup via SMC PNET table */
+       smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid);
+
+out_rel:
+       dst_release(dst);
+out:
+       return;
+}
+
+void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
+{
+       struct dst_entry *dst = sk_dst_get(sk);
+
+       *smcismdev = NULL;
+       if (!dst)
+               goto out;
+       if (!dst->dev)
+               goto out_rel;
+
+       /* if possible, lookup via hardware-defined pnetid */
+       smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
+
 out_rel:
        dst_release(dst);
+out:
+       return;
 }
index 5a29519db976f2b1b542e7aacacaed4f3f8ea812..8ff777636e325f126d668d38edb2f66109216333 100644 (file)
 #ifndef _SMC_PNET_H
 #define _SMC_PNET_H
 
+#if IS_ENABLED(CONFIG_HAVE_PNETID)
+#include <asm/pnet.h>
+#endif
+
 struct smc_ib_device;
+struct smcd_dev;
+
+static inline int smc_pnetid_by_dev_port(struct device *dev,
+                                        unsigned short port, u8 *pnetid)
+{
+#if IS_ENABLED(CONFIG_HAVE_PNETID)
+       return pnet_id_by_dev_port(dev, port, pnetid);
+#else
+       return -ENOENT;
+#endif
+}
 
 int smc_pnet_init(void) __init;
 void smc_pnet_exit(void);
 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
 void smc_pnet_find_roce_resource(struct sock *sk,
-                                struct smc_ib_device **smcibdev, u8 *ibport);
+                                struct smc_ib_device **smcibdev, u8 *ibport,
+                                unsigned short vlan_id, u8 gid[]);
+void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
 
 #endif
index 3d77b383cccd97f7770580f3512e642aeae24d6b..bbcf0fe4ae10f631a84a2379d1c075f087db7037 100644 (file)
@@ -82,8 +82,7 @@ static int smc_rx_update_consumer(struct smc_sock *smc,
                }
        }
 
-       smc_curs_write(&conn->local_tx_ctrl.cons, smc_curs_read(&cons, conn),
-                      conn);
+       smc_curs_copy(&conn->local_tx_ctrl.cons, &cons, conn);
 
        /* send consumer cursor update if required */
        /* similar to advertising new TCP rcv_wnd if required */
@@ -97,8 +96,7 @@ static void smc_rx_update_cons(struct smc_sock *smc, size_t len)
        struct smc_connection *conn = &smc->conn;
        union smc_host_cursor cons;
 
-       smc_curs_write(&cons, smc_curs_read(&conn->local_tx_ctrl.cons, conn),
-                      conn);
+       smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
        smc_rx_update_consumer(smc, cons, len);
 }
 
@@ -157,10 +155,8 @@ static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,
        struct splice_pipe_desc spd;
        struct partial_page partial;
        struct smc_spd_priv *priv;
-       struct page *page;
        int bytes;
 
-       page = virt_to_page(smc->conn.rmb_desc->cpu_addr);
        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
@@ -172,7 +168,7 @@ static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,
 
        spd.nr_pages_max = 1;
        spd.nr_pages = 1;
-       spd.pages = &page;
+       spd.pages = &smc->conn.rmb_desc->pages;
        spd.partial = &partial;
        spd.ops = &smc_pipe_ops;
        spd.spd_release = smc_rx_spd_release;
@@ -245,10 +241,7 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
                        if (!(flags & MSG_TRUNC))
                                rc = memcpy_to_msg(msg, &conn->urg_rx_byte, 1);
                        len = 1;
-                       smc_curs_write(&cons,
-                                      smc_curs_read(&conn->local_tx_ctrl.cons,
-                                                    conn),
-                                      conn);
+                       smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
                        if (smc_curs_diff(conn->rmb_desc->len, &cons,
                                          &conn->urg_curs) > 1)
                                conn->urg_rx_skip_pend = true;
@@ -305,7 +298,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
        target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 
        /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
-       rcvbuf_base = conn->rmb_desc->cpu_addr;
+       rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr;
 
        do { /* while (read_remaining) */
                if (read_done >= target || (pipe && read_done))
@@ -370,9 +363,7 @@ copy:
                        continue;
                }
 
-               smc_curs_write(&cons,
-                              smc_curs_read(&conn->local_tx_ctrl.cons, conn),
-                              conn);
+               smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
                /* subsequent splice() calls pick up where previous left */
                if (splbytes)
                        smc_curs_add(conn->rmb_desc->len, &cons, splbytes);
index f82886b7d1d8394adada4998159a708c3c897a82..2f5e324e54b97a0712bc7d07c5de9307901e3b08 100644 (file)
@@ -24,6 +24,7 @@
 #include "smc.h"
 #include "smc_wr.h"
 #include "smc_cdc.h"
+#include "smc_ism.h"
 #include "smc_tx.h"
 
 #define SMC_TX_WORK_DELAY      HZ
@@ -180,9 +181,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
                copylen = min_t(size_t, send_remaining, writespace);
                /* determine start of sndbuf */
                sndbuf_base = conn->sndbuf_desc->cpu_addr;
-               smc_curs_write(&prep,
-                              smc_curs_read(&conn->tx_curs_prep, conn),
-                              conn);
+               smc_curs_copy(&prep, &conn->tx_curs_prep, conn);
                tx_cnt_prep = prep.count;
                /* determine chunks where to write into sndbuf */
                /* either unwrapped case, or 1st chunk of wrapped case */
@@ -213,9 +212,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
                smc_sndbuf_sync_sg_for_device(conn);
                /* update cursors */
                smc_curs_add(conn->sndbuf_desc->len, &prep, copylen);
-               smc_curs_write(&conn->tx_curs_prep,
-                              smc_curs_read(&prep, conn),
-                              conn);
+               smc_curs_copy(&conn->tx_curs_prep, &prep, conn);
                /* increased in send tasklet smc_cdc_tx_handler() */
                smp_mb__before_atomic();
                atomic_sub(copylen, &conn->sndbuf_space);
@@ -250,6 +247,24 @@ out_err:
 
 /***************************** sndbuf consumer *******************************/
 
+/* sndbuf consumer: actual data transfer of one target chunk with ISM write */
+int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
+                     u32 offset, int signal)
+{
+       struct smc_ism_position pos;
+       int rc;
+
+       memset(&pos, 0, sizeof(pos));
+       pos.token = conn->peer_token;
+       pos.index = conn->peer_rmbe_idx;
+       pos.offset = conn->tx_off + offset;
+       pos.signal = signal;
+       rc = smc_ism_write(conn->lgr->smcd, &pos, data, len);
+       if (rc)
+               conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+       return rc;
+}
+
 /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
 static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
                             int num_sges, struct ib_sge sges[])
@@ -297,26 +312,109 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
        smc_curs_add(conn->sndbuf_desc->len, sent, len);
 }
 
+/* SMC-R helper for smc_tx_rdma_writes() */
+static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
+                              size_t src_off, size_t src_len,
+                              size_t dst_off, size_t dst_len)
+{
+       dma_addr_t dma_addr =
+               sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
+       struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+       int src_len_sum = src_len, dst_len_sum = dst_len;
+       struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
+       int sent_count = src_off;
+       int srcchunk, dstchunk;
+       int num_sges;
+       int rc;
+
+       for (dstchunk = 0; dstchunk < 2; dstchunk++) {
+               num_sges = 0;
+               for (srcchunk = 0; srcchunk < 2; srcchunk++) {
+                       sges[srcchunk].addr = dma_addr + src_off;
+                       sges[srcchunk].length = src_len;
+                       sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
+                       num_sges++;
+
+                       src_off += src_len;
+                       if (src_off >= conn->sndbuf_desc->len)
+                               src_off -= conn->sndbuf_desc->len;
+                                               /* modulo in send ring */
+                       if (src_len_sum == dst_len)
+                               break; /* either on 1st or 2nd iteration */
+                       /* prepare next (== 2nd) iteration */
+                       src_len = dst_len - src_len; /* remainder */
+                       src_len_sum += src_len;
+               }
+               rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
+               if (rc)
+                       return rc;
+               if (dst_len_sum == len)
+                       break; /* either on 1st or 2nd iteration */
+               /* prepare next (== 2nd) iteration */
+               dst_off = 0; /* modulo offset in RMBE ring buffer */
+               dst_len = len - dst_len; /* remainder */
+               dst_len_sum += dst_len;
+               src_len = min_t(int, dst_len, conn->sndbuf_desc->len -
+                               sent_count);
+               src_len_sum = src_len;
+       }
+       return 0;
+}
+
+/* SMC-D helper for smc_tx_rdma_writes() */
+static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,
+                              size_t src_off, size_t src_len,
+                              size_t dst_off, size_t dst_len)
+{
+       int src_len_sum = src_len, dst_len_sum = dst_len;
+       int srcchunk, dstchunk;
+       int rc;
+
+       for (dstchunk = 0; dstchunk < 2; dstchunk++) {
+               for (srcchunk = 0; srcchunk < 2; srcchunk++) {
+                       void *data = conn->sndbuf_desc->cpu_addr + src_off;
+
+                       rc = smcd_tx_ism_write(conn, data, src_len, dst_off +
+                                              sizeof(struct smcd_cdc_msg), 0);
+                       if (rc)
+                               return rc;
+                       dst_off += src_len;
+                       src_off += src_len;
+                       if (src_off >= conn->sndbuf_desc->len)
+                               src_off -= conn->sndbuf_desc->len;
+                                               /* modulo in send ring */
+                       if (src_len_sum == dst_len)
+                               break; /* either on 1st or 2nd iteration */
+                       /* prepare next (== 2nd) iteration */
+                       src_len = dst_len - src_len; /* remainder */
+                       src_len_sum += src_len;
+               }
+               if (dst_len_sum == len)
+                       break; /* either on 1st or 2nd iteration */
+               /* prepare next (== 2nd) iteration */
+               dst_off = 0; /* modulo offset in RMBE ring buffer */
+               dst_len = len - dst_len; /* remainder */
+               dst_len_sum += dst_len;
+               src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off);
+               src_len_sum = src_len;
+       }
+       return 0;
+}
+
 /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
  * usable snd_wnd as max transmit
  */
 static int smc_tx_rdma_writes(struct smc_connection *conn)
 {
-       size_t src_off, src_len, dst_off, dst_len; /* current chunk values */
-       size_t len, dst_len_sum, src_len_sum, dstchunk, srcchunk;
+       size_t len, src_len, dst_off, dst_len; /* current chunk values */
        union smc_host_cursor sent, prep, prod, cons;
-       struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
-       struct smc_link_group *lgr = conn->lgr;
        struct smc_cdc_producer_flags *pflags;
        int to_send, rmbespace;
-       struct smc_link *link;
-       dma_addr_t dma_addr;
-       int num_sges;
        int rc;
 
        /* source: sndbuf */
-       smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn);
-       smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn);
+       smc_curs_copy(&sent, &conn->tx_curs_sent, conn);
+       smc_curs_copy(&prep, &conn->tx_curs_prep, conn);
        /* cf. wmem_alloc - (snd_max - snd_una) */
        to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
        if (to_send <= 0)
@@ -327,12 +425,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
        rmbespace = atomic_read(&conn->peer_rmbe_space);
        if (rmbespace <= 0)
                return 0;
-       smc_curs_write(&prod,
-                      smc_curs_read(&conn->local_tx_ctrl.prod, conn),
-                      conn);
-       smc_curs_write(&cons,
-                      smc_curs_read(&conn->local_rx_ctrl.cons, conn),
-                      conn);
+       smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
+       smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
 
        /* if usable snd_wnd closes ask peer to advertise once it opens again */
        pflags = &conn->local_tx_ctrl.prod_flags;
@@ -341,7 +435,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
        len = min(to_send, rmbespace);
 
        /* initialize variables for first iteration of subsequent nested loop */
-       link = &lgr->lnk[SMC_SINGLE_LINK];
        dst_off = prod.count;
        if (prod.wrap == cons.wrap) {
                /* the filled destination area is unwrapped,
@@ -358,8 +451,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
                 */
                dst_len = len;
        }
-       dst_len_sum = dst_len;
-       src_off = sent.count;
        /* dst_len determines the maximum src_len */
        if (sent.count + dst_len <= conn->sndbuf_desc->len) {
                /* unwrapped src case: single chunk of entire dst_len */
@@ -368,51 +459,23 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
                /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */
                src_len = conn->sndbuf_desc->len - sent.count;
        }
-       src_len_sum = src_len;
-       dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
-       for (dstchunk = 0; dstchunk < 2; dstchunk++) {
-               num_sges = 0;
-               for (srcchunk = 0; srcchunk < 2; srcchunk++) {
-                       sges[srcchunk].addr = dma_addr + src_off;
-                       sges[srcchunk].length = src_len;
-                       sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
-                       num_sges++;
-                       src_off += src_len;
-                       if (src_off >= conn->sndbuf_desc->len)
-                               src_off -= conn->sndbuf_desc->len;
-                                               /* modulo in send ring */
-                       if (src_len_sum == dst_len)
-                               break; /* either on 1st or 2nd iteration */
-                       /* prepare next (== 2nd) iteration */
-                       src_len = dst_len - src_len; /* remainder */
-                       src_len_sum += src_len;
-               }
-               rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
-               if (rc)
-                       return rc;
-               if (dst_len_sum == len)
-                       break; /* either on 1st or 2nd iteration */
-               /* prepare next (== 2nd) iteration */
-               dst_off = 0; /* modulo offset in RMBE ring buffer */
-               dst_len = len - dst_len; /* remainder */
-               dst_len_sum += dst_len;
-               src_len = min_t(int,
-                               dst_len, conn->sndbuf_desc->len - sent.count);
-               src_len_sum = src_len;
-       }
+
+       if (conn->lgr->is_smcd)
+               rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len,
+                                        dst_off, dst_len);
+       else
+               rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
+                                        dst_off, dst_len);
+       if (rc)
+               return rc;
 
        if (conn->urg_tx_pend && len == to_send)
                pflags->urg_data_present = 1;
        smc_tx_advance_cursors(conn, &prod, &sent, len);
        /* update connection's cursors with advanced local cursors */
-       smc_curs_write(&conn->local_tx_ctrl.prod,
-                      smc_curs_read(&prod, conn),
-                      conn);
+       smc_curs_copy(&conn->local_tx_ctrl.prod, &prod, conn);
                                                        /* dst: peer RMBE */
-       smc_curs_write(&conn->tx_curs_sent,
-                      smc_curs_read(&sent, conn),
-                      conn);
-                                                       /* src: local sndbuf */
+       smc_curs_copy(&conn->tx_curs_sent, &sent, conn);/* src: local sndbuf */
 
        return 0;
 }
@@ -420,7 +483,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 /* Wakeup sndbuf consumers from any context (IRQ or process)
  * since there is more data to transmit; usable snd_wnd as max transmit
  */
-int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
 {
        struct smc_cdc_producer_flags *pflags;
        struct smc_cdc_tx_pend *pend;
@@ -467,6 +530,37 @@ out_unlock:
        return rc;
 }
 
+static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+       struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
+       int rc = 0;
+
+       spin_lock_bh(&conn->send_lock);
+       if (!pflags->urg_data_present)
+               rc = smc_tx_rdma_writes(conn);
+       if (!rc)
+               rc = smcd_cdc_msg_send(conn);
+
+       if (!rc && pflags->urg_data_present) {
+               pflags->urg_data_pending = 0;
+               pflags->urg_data_present = 0;
+       }
+       spin_unlock_bh(&conn->send_lock);
+       return rc;
+}
+
+int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+       int rc;
+
+       if (conn->lgr->is_smcd)
+               rc = smcd_tx_sndbuf_nonempty(conn);
+       else
+               rc = smcr_tx_sndbuf_nonempty(conn);
+
+       return rc;
+}
+
 /* Wakeup sndbuf consumers from process context
  * since there is more data to transmit
  */
@@ -499,17 +593,11 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
        int sender_free = conn->rmb_desc->len;
        int to_confirm;
 
-       smc_curs_write(&cons,
-                      smc_curs_read(&conn->local_tx_ctrl.cons, conn),
-                      conn);
-       smc_curs_write(&cfed,
-                      smc_curs_read(&conn->rx_curs_confirmed, conn),
-                      conn);
+       smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
+       smc_curs_copy(&cfed, &conn->rx_curs_confirmed, conn);
        to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
        if (to_confirm > conn->rmbe_update_limit) {
-               smc_curs_write(&prod,
-                              smc_curs_read(&conn->local_rx_ctrl.prod, conn),
-                              conn);
+               smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn);
                sender_free = conn->rmb_desc->len -
                              smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);
        }
@@ -525,9 +613,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
                                              SMC_TX_WORK_DELAY);
                        return;
                }
-               smc_curs_write(&conn->rx_curs_confirmed,
-                              smc_curs_read(&conn->local_tx_ctrl.cons, conn),
-                              conn);
+               smc_curs_copy(&conn->rx_curs_confirmed,
+                             &conn->local_tx_ctrl.cons, conn);
                conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
        }
        if (conn->local_rx_ctrl.prod_flags.write_blocked &&
index 9d2238909fa08d72e63537607132dd3ac6a4e93f..07e6ad76224a0cd1b2fe5bb91d7acaa4eb977534 100644 (file)
@@ -22,8 +22,8 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn)
 {
        union smc_host_cursor sent, prep;
 
-       smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn);
-       smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn);
+       smc_curs_copy(&sent, &conn->tx_curs_sent, conn);
+       smc_curs_copy(&prep, &conn->tx_curs_prep, conn);
        return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
 }
 
@@ -33,5 +33,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
 int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
 void smc_tx_sndbuf_nonfull(struct smc_sock *smc);
 void smc_tx_consumer_update(struct smc_connection *conn, bool force);
+int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
+                     u32 offset, int signal);
 
 #endif /* SMC_TX_H */
index dbd2605d19627b0f91731767f3d0d8e0c166f454..f856b8402b3fe527c0dbf80225d501bcab9336ce 100644 (file)
@@ -92,8 +92,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
        if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
                return;
        if (wc->status) {
-               struct smc_link_group *lgr;
-
                for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
                        /* clear full struct smc_wr_tx_pend including .priv */
                        memset(&link->wr_tx_pends[i], 0,
@@ -103,9 +101,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
                        clear_bit(i, link->wr_tx_mask);
                }
                /* terminate connections of this link group abnormally */
-               lgr = container_of(link, struct smc_link_group,
-                                  lnk[SMC_SINGLE_LINK]);
-               smc_lgr_terminate(lgr);
+               smc_lgr_terminate(smc_get_lgr(link));
        }
        if (pnd_snd.handler)
                pnd_snd.handler(&pnd_snd.priv, link, wc->status);
@@ -186,18 +182,14 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
                if (rc)
                        return rc;
        } else {
-               struct smc_link_group *lgr;
-
-               lgr = container_of(link, struct smc_link_group,
-                                  lnk[SMC_SINGLE_LINK]);
                rc = wait_event_timeout(
                        link->wr_tx_wait,
-                       list_empty(&lgr->list) || /* lgr terminated */
+                       link->state == SMC_LNK_INACTIVE ||
                        (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
                        SMC_WR_TX_WAIT_FREE_SLOT_TIME);
                if (!rc) {
                        /* timeout - terminate connections */
-                       smc_lgr_terminate(lgr);
+                       smc_lgr_terminate(smc_get_lgr(link));
                        return -EPIPE;
                }
                if (idx == link->wr_tx_cnt)
@@ -250,12 +242,8 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
        rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
                          &failed_wr);
        if (rc) {
-               struct smc_link_group *lgr =
-                       container_of(link, struct smc_link_group,
-                                    lnk[SMC_SINGLE_LINK]);
-
                smc_wr_tx_put_slot(link, priv);
-               smc_lgr_terminate(lgr);
+               smc_lgr_terminate(smc_get_lgr(link));
        }
        return rc;
 }
@@ -283,11 +271,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
                                              SMC_WR_REG_MR_WAIT_TIME);
        if (!rc) {
                /* timeout - terminate connections */
-               struct smc_link_group *lgr;
-
-               lgr = container_of(link, struct smc_link_group,
-                                  lnk[SMC_SINGLE_LINK]);
-               smc_lgr_terminate(lgr);
+               smc_lgr_terminate(smc_get_lgr(link));
                return -EPIPE;
        }
        if (rc == -ERESTARTSYS)
@@ -380,8 +364,6 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
                        smc_wr_rx_demultiplex(&wc[i]);
                        smc_wr_rx_post(link); /* refill WR RX */
                } else {
-                       struct smc_link_group *lgr;
-
                        /* handle status errors */
                        switch (wc[i].status) {
                        case IB_WC_RETRY_EXC_ERR:
@@ -390,9 +372,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
                                /* terminate connections of this link group
                                 * abnormally
                                 */
-                               lgr = container_of(link, struct smc_link_group,
-                                                  lnk[SMC_SINGLE_LINK]);
-                               smc_lgr_terminate(lgr);
+                               smc_lgr_terminate(smc_get_lgr(link));
                                break;
                        default:
                                smc_wr_rx_post(link); /* refill WR RX */
index 8c24d5dc4bc8f7f4faab62638e41f6d2ac66a842..b91949168a87fa7f7bd576355635340107b68a2e 100644 (file)
@@ -252,7 +252,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
        init_waitqueue_head(&wq->wait);
        wq->fasync_list = NULL;
        wq->flags = 0;
-       RCU_INIT_POINTER(ei->socket.wq, wq);
+       ei->socket.wq = wq;
 
        ei->socket.state = SS_UNCONNECTED;
        ei->socket.flags = 0;
@@ -266,11 +266,9 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 static void sock_destroy_inode(struct inode *inode)
 {
        struct socket_alloc *ei;
-       struct socket_wq *wq;
 
        ei = container_of(inode, struct socket_alloc, vfs_inode);
-       wq = rcu_dereference_protected(ei->socket.wq, 1);
-       kfree_rcu(wq, rcu);
+       kfree_rcu(ei->socket.wq, rcu);
        kmem_cache_free(sock_inode_cachep, ei);
 }
 
@@ -604,7 +602,7 @@ static void __sock_release(struct socket *sock, struct inode *inode)
                module_put(owner);
        }
 
-       if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
+       if (sock->wq->fasync_list)
                pr_err("%s: fasync list not empty!\n", __func__);
 
        if (!sock->file) {
@@ -1131,12 +1129,21 @@ EXPORT_SYMBOL(sock_create_lite);
 static __poll_t sock_poll(struct file *file, poll_table *wait)
 {
        struct socket *sock = file->private_data;
-       __poll_t events = poll_requested_events(wait);
+       __poll_t events = poll_requested_events(wait), flag = 0;
 
-       sock_poll_busy_loop(sock, events);
        if (!sock->ops->poll)
                return 0;
-       return sock->ops->poll(file, sock, wait) | sock_poll_busy_flag(sock);
+
+       if (sk_can_busy_loop(sock->sk)) {
+               /* poll once if requested by the syscall */
+               if (events & POLL_BUSY_LOOP)
+                       sk_busy_loop(sock->sk, 1);
+
+               /* if this socket can poll_ll, tell the system call */
+               flag = POLL_BUSY_LOOP;
+       }
+
+       return sock->ops->poll(file, sock, wait) | flag;
 }
 
 static int sock_mmap(struct file *file, struct vm_area_struct *vma)
@@ -1173,7 +1180,7 @@ static int sock_fasync(int fd, struct file *filp, int on)
                return -EINVAL;
 
        lock_sock(sk);
-       wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
+       wq = sock->wq;
        fasync_helper(fd, filp, on, &wq->fasync_list);
 
        if (!wq->fasync_list)
index 625acb27efcc272ccdc0f60d4d693d6761ed139b..da1a676860cad3c8a2a95acf11f0e908fe2bc255 100644 (file)
@@ -140,11 +140,13 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
                        /* We are going to append to the frags_list of head.
                         * Need to unshare the frag_list.
                         */
-                       err = skb_unclone(head, GFP_ATOMIC);
-                       if (err) {
-                               STRP_STATS_INCR(strp->stats.mem_fail);
-                               desc->error = err;
-                               return 0;
+                       if (skb_has_frag_list(head)) {
+                               err = skb_unclone(head, GFP_ATOMIC);
+                               if (err) {
+                                       STRP_STATS_INCR(strp->stats.mem_fail);
+                                       desc->error = err;
+                                       return 0;
+                               }
                        }
 
                        if (unlikely(skb_shinfo(head)->frag_list)) {
@@ -201,14 +203,16 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
                        memset(stm, 0, sizeof(*stm));
                        stm->strp.offset = orig_offset + eaten;
                } else {
-                       /* Unclone since we may be appending to an skb that we
+                       /* Unclone if we are appending to an skb that we
                         * already share a frag_list with.
                         */
-                       err = skb_unclone(skb, GFP_ATOMIC);
-                       if (err) {
-                               STRP_STATS_INCR(strp->stats.mem_fail);
-                               desc->error = err;
-                               break;
+                       if (skb_has_frag_list(skb)) {
+                               err = skb_unclone(skb, GFP_ATOMIC);
+                               if (err) {
+                                       STRP_STATS_INCR(strp->stats.mem_fail);
+                                       desc->error = err;
+                                       break;
+                               }
                        }
 
                        stm = _strp_msg(head);
@@ -404,8 +408,6 @@ EXPORT_SYMBOL_GPL(strp_data_ready);
 
 static void do_strp_work(struct strparser *strp)
 {
-       read_descriptor_t rd_desc;
-
        /* We need the read lock to synchronize with strp_data_ready. We
         * need the socket lock for calling strp_read_sock.
         */
@@ -417,8 +419,6 @@ static void do_strp_work(struct strparser *strp)
        if (strp->paused)
                goto out;
 
-       rd_desc.arg.data = strp;
-
        if (strp_read_sock(strp) == -ENOMEM)
                queue_work(strp_wq, &strp->work);
 
index be8f103d22fdb7e439bb3ae610aaa3726a4b8332..0fc397fae42bc3ae017c2260b746c32d0d0c952e 100644 (file)
@@ -517,7 +517,7 @@ gss_alloc_msg(struct gss_auth *gss_auth,
                err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
                if (err)
                        goto err_put_pipe_version;
-       };
+       }
        kref_get(&gss_auth->kref);
        return gss_msg;
 err_put_pipe_version:
index f3711176be4559e5707d4c75d130a7907e35afe2..9ee6cfea56dd015851302f1702f9e068803d01dd 100644 (file)
@@ -512,7 +512,7 @@ int tipc_bcast_init(struct net *net)
        struct tipc_bc_base *bb = NULL;
        struct tipc_link *l = NULL;
 
-       bb = kzalloc(sizeof(*bb), GFP_ATOMIC);
+       bb = kzalloc(sizeof(*bb), GFP_KERNEL);
        if (!bb)
                goto enomem;
        tn->bcbase = bb;
index 2dfb492a7c943b61f1b0c551faa6ba1230f3159f..418f03d0be90f076cf34b1ee96495ceb3a3d68de 100644 (file)
@@ -395,6 +395,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
                tipc_net_init(net, node_id, 0);
        }
        if (!tipc_own_id(net)) {
+               dev_put(dev);
                pr_warn("Failed to obtain node identity\n");
                return -EINVAL;
        }
@@ -610,6 +611,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
        case NETDEV_CHANGE:
                if (netif_carrier_ok(dev))
                        break;
+               /* else: fall through */
        case NETDEV_UP:
                test_and_set_bit_lock(0, &b->up);
                break;
index d7a7befeddd42c907bdf88a37e0b77515b3e8705..e82f13cb2dc5aececbd4ce2e6b9f59e1e2f003e8 100644 (file)
@@ -159,11 +159,6 @@ u32 tipc_group_exclude(struct tipc_group *grp)
        return 0;
 }
 
-int tipc_group_size(struct tipc_group *grp)
-{
-       return grp->member_cnt;
-}
-
 struct tipc_group *tipc_group_create(struct net *net, u32 portid,
                                     struct tipc_group_req *mreq,
                                     bool *group_is_open)
@@ -232,8 +227,8 @@ void tipc_group_delete(struct net *net, struct tipc_group *grp)
        kfree(grp);
 }
 
-struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
-                                          u32 node, u32 port)
+static struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
+                                                 u32 node, u32 port)
 {
        struct rb_node *n = grp->members.rb_node;
        u64 nkey, key = (u64)node << 32 | port;
@@ -918,3 +913,35 @@ void tipc_group_member_evt(struct tipc_group *grp,
        }
        *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
 }
+
+int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb)
+{
+       struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP);
+
+       if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID,
+                       grp->type) ||
+           nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE,
+                       grp->instance) ||
+           nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_BC_SEND_NEXT,
+                       grp->bc_snd_nxt))
+               goto group_msg_cancel;
+
+       if (grp->scope == TIPC_NODE_SCOPE)
+               if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_NODE_SCOPE))
+                       goto group_msg_cancel;
+
+       if (grp->scope == TIPC_CLUSTER_SCOPE)
+               if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_CLUSTER_SCOPE))
+                       goto group_msg_cancel;
+
+       if (*grp->open)
+               if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_OPEN))
+                       goto group_msg_cancel;
+
+       nla_nest_end(skb, group);
+       return 0;
+
+group_msg_cancel:
+       nla_nest_cancel(skb, group);
+       return -1;
+}
index 5996af6e9f1ddb72b565ae9288e685c83d6a1c0d..76b4e5a7b39deb83520932a7af678e7b25558be4 100644 (file)
@@ -72,4 +72,5 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
                               u32 port, struct sk_buff_head *xmitq);
 u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
 void tipc_group_update_member(struct tipc_member *m, int len);
+int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb);
 #endif
index 695acb783969730e448579772dfa3a17b3e827d6..b1f0bee54eacc9eb1974169853abf1ace4df2733 100644 (file)
@@ -106,7 +106,8 @@ struct tipc_stats {
  * @backlogq: queue for messages waiting to be sent
  * @snt_nxt: next sequence number to use for outbound messages
  * @last_retransmitted: sequence number of most recently retransmitted message
- * @stale_count: # of identical retransmit requests made by peer
+ * @stale_cnt: counter for number of identical retransmit attempts
+ * @stale_limit: time when repeated identical retransmits must force link reset
  * @ackers: # of peers that needs to ack each packet before it can be released
  * @acked: # last packet acked by a certain peer. Used for broadcast.
  * @rcv_nxt: next sequence number to expect for inbound messages
@@ -127,14 +128,17 @@ struct tipc_link {
        struct net *net;
 
        /* Management and link supervision data */
-       u32 peer_session;
-       u32 session;
+       u16 peer_session;
+       u16 session;
+       u16 snd_nxt_state;
+       u16 rcv_nxt_state;
        u32 peer_bearer_id;
        u32 bearer_id;
        u32 tolerance;
        u32 abort_limit;
        u32 state;
        u16 peer_caps;
+       bool in_session;
        bool active;
        u32 silent_intv_cnt;
        char if_name[TIPC_MAX_IF_NAME];
@@ -161,7 +165,8 @@ struct tipc_link {
        u16 snd_nxt;
        u16 last_retransm;
        u16 window;
-       u32 stale_count;
+       u16 stale_cnt;
+       unsigned long stale_limit;
 
        /* Reception */
        u16 rcv_nxt;
@@ -212,11 +217,6 @@ enum {
  */
 #define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
 
-/* Wildcard value for link session numbers. When it is known that
- * peer endpoint is down, any session number must be accepted.
- */
-#define ANY_SESSION 0x10000
-
 /* Link FSM states:
  */
 enum {
@@ -297,11 +297,6 @@ static bool link_is_bc_rcvlink(struct tipc_link *l)
        return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l));
 }
 
-int tipc_link_is_active(struct tipc_link *l)
-{
-       return l->active;
-}
-
 void tipc_link_set_active(struct tipc_link *l, bool active)
 {
        l->active = active;
@@ -337,6 +332,11 @@ char tipc_link_plane(struct tipc_link *l)
        return l->net_plane;
 }
 
+void tipc_link_update_caps(struct tipc_link *l, u16 capabilities)
+{
+       l->peer_caps = capabilities;
+}
+
 void tipc_link_add_bc_peer(struct tipc_link *snd_l,
                           struct tipc_link *uc_l,
                           struct sk_buff_head *xmitq)
@@ -373,7 +373,7 @@ int tipc_link_bc_peers(struct tipc_link *l)
        return l->ackers;
 }
 
-u16 link_bc_rcv_gap(struct tipc_link *l)
+static u16 link_bc_rcv_gap(struct tipc_link *l)
 {
        struct sk_buff *skb = skb_peek(&l->deferdq);
        u16 gap = 0;
@@ -469,7 +469,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
        l->addr = peer;
        l->peer_caps = peer_caps;
        l->net = net;
-       l->peer_session = ANY_SESSION;
+       l->in_session = false;
        l->bearer_id = bearer_id;
        l->tolerance = tolerance;
        l->net_plane = net_plane;
@@ -820,7 +820,7 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
  * Wake up a number of waiting users, as permitted by available space
  * in the send queue
  */
-void link_prepare_wakeup(struct tipc_link *l)
+static void link_prepare_wakeup(struct tipc_link *l)
 {
        struct sk_buff *skb, *tmp;
        int imp, i = 0;
@@ -838,7 +838,7 @@ void link_prepare_wakeup(struct tipc_link *l)
 
 void tipc_link_reset(struct tipc_link *l)
 {
-       l->peer_session = ANY_SESSION;
+       l->in_session = false;
        l->session++;
        l->mtu = l->advertised_mtu;
        __skb_queue_purge(&l->transmq);
@@ -857,10 +857,12 @@ void tipc_link_reset(struct tipc_link *l)
        l->rcv_unacked = 0;
        l->snd_nxt = 1;
        l->rcv_nxt = 1;
+       l->snd_nxt_state = 1;
+       l->rcv_nxt_state = 1;
        l->acked = 0;
        l->silent_intv_cnt = 0;
        l->rst_cnt = 0;
-       l->stale_count = 0;
+       l->stale_cnt = 0;
        l->bc_peer_is_up = false;
        memset(&l->mon_state, 0, sizeof(l->mon_state));
        tipc_link_reset_stats(l);
@@ -954,7 +956,8 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
        return rc;
 }
 
-void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
+static void tipc_link_advance_backlog(struct tipc_link *l,
+                                     struct sk_buff_head *xmitq)
 {
        struct sk_buff *skb, *_skb;
        struct tipc_msg *hdr;
@@ -997,39 +1000,41 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
                msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
 }
 
-int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
-                     u16 from, u16 to, struct sk_buff_head *xmitq)
+/* tipc_link_retrans() - retransmit one or more packets
+ * @l: the link to transmit on
+ * @r: the receiving link ordering the retransmit. Same as l if unicast
+ * @from: retransmit from (inclusive) this sequence number
+ * @to: retransmit to (inclusive) this sequence number
+ * xmitq: queue for accumulating the retransmitted packets
+ */
+static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
+                            u16 from, u16 to, struct sk_buff_head *xmitq)
 {
        struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
-       struct tipc_msg *hdr;
-       u16 ack = l->rcv_nxt - 1;
        u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+       u16 ack = l->rcv_nxt - 1;
+       struct tipc_msg *hdr;
 
        if (!skb)
                return 0;
 
        /* Detect repeated retransmit failures on same packet */
-       if (nacker->last_retransm != buf_seqno(skb)) {
-               nacker->last_retransm = buf_seqno(skb);
-               nacker->stale_count = 1;
-       } else if (++nacker->stale_count > 100) {
+       if (r->last_retransm != buf_seqno(skb)) {
+               r->last_retransm = buf_seqno(skb);
+               r->stale_limit = jiffies + msecs_to_jiffies(l->tolerance);
+       } else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
                link_retransmit_failure(l, skb);
-               nacker->stale_count = 0;
                if (link_is_bc_sndlink(l))
                        return TIPC_LINK_DOWN_EVT;
                return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
        }
 
-       /* Move forward to where retransmission should start */
        skb_queue_walk(&l->transmq, skb) {
-               if (!less(buf_seqno(skb), from))
-                       break;
-       }
-
-       skb_queue_walk_from(&l->transmq, skb) {
-               if (more(buf_seqno(skb), to))
-                       break;
                hdr = buf_msg(skb);
+               if (less(msg_seqno(hdr), from))
+                       continue;
+               if (more(msg_seqno(hdr), to))
+                       break;
                _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
                if (!_skb)
                        return 0;
@@ -1063,6 +1068,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
                        skb_queue_tail(mc_inputq, skb);
                        return true;
                }
+               /* else: fall through */
        case CONN_MANAGER:
                skb_queue_tail(inputq, skb);
                return true;
@@ -1271,6 +1277,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
 
                /* Forward queues and wake up waiting users */
                if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
+                       l->stale_cnt = 0;
                        tipc_link_advance_backlog(l, xmitq);
                        if (unlikely(!skb_queue_empty(&l->wakeupq)))
                                link_prepare_wakeup(l);
@@ -1347,6 +1354,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
        msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2);
 
        if (mtyp == STATE_MSG) {
+               if (l->peer_caps & TIPC_LINK_PROTO_SEQNO)
+                       msg_set_seqno(hdr, l->snd_nxt_state++);
                msg_set_seq_gap(hdr, rcvgap);
                msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
                msg_set_probe(hdr, probe);
@@ -1438,6 +1447,44 @@ tnl:
        }
 }
 
+/* tipc_link_validate_msg(): validate message against current link state
+ * Returns true if message should be accepted, otherwise false
+ */
+bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr)
+{
+       u16 curr_session = l->peer_session;
+       u16 session = msg_session(hdr);
+       int mtyp = msg_type(hdr);
+
+       if (msg_user(hdr) != LINK_PROTOCOL)
+               return true;
+
+       switch (mtyp) {
+       case RESET_MSG:
+               if (!l->in_session)
+                       return true;
+               /* Accept only RESET with new session number */
+               return more(session, curr_session);
+       case ACTIVATE_MSG:
+               if (!l->in_session)
+                       return true;
+               /* Accept only ACTIVATE with new or current session number */
+               return !less(session, curr_session);
+       case STATE_MSG:
+               /* Accept only STATE with current session number */
+               if (!l->in_session)
+                       return false;
+               if (session != curr_session)
+                       return false;
+               if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO))
+                       return true;
+               /* Accept only STATE with new sequence number */
+               return !less(msg_seqno(hdr), l->rcv_nxt_state);
+       default:
+               return false;
+       }
+}
+
 /* tipc_link_proto_rcv(): receive link level protocol message :
  * Note that network plane id propagates through the network, and may
  * change at any time. The node with lowest numerical id determines
@@ -1471,17 +1518,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
        hdr = buf_msg(skb);
        data = msg_data(hdr);
 
+       if (!tipc_link_validate_msg(l, hdr))
+               goto exit;
+
        switch (mtyp) {
        case RESET_MSG:
-
-               /* Ignore duplicate RESET with old session number */
-               if ((less_eq(msg_session(hdr), l->peer_session)) &&
-                   (l->peer_session != ANY_SESSION))
-                       break;
-               /* fall thru' */
-
        case ACTIVATE_MSG:
-
                /* Complete own link name with peer's interface name */
                if_name =  strrchr(l->name, ':') + 1;
                if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME)
@@ -1509,12 +1551,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
                        rc = TIPC_LINK_UP_EVT;
 
                l->peer_session = msg_session(hdr);
+               l->in_session = true;
                l->peer_bearer_id = msg_bearer_id(hdr);
                if (l->mtu > msg_max_pkt(hdr))
                        l->mtu = msg_max_pkt(hdr);
                break;
 
        case STATE_MSG:
+               l->rcv_nxt_state = msg_seqno(hdr) + 1;
 
                /* Update own tolerance if peer indicates a non-zero value */
                if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
index ec59348a81e8b7a5311bb9ff25ab778ea91b0354..7bc494a33fdf1c3cdf8feb04b44db7e6e04a349c 100644 (file)
@@ -110,6 +110,8 @@ char *tipc_link_name(struct tipc_link *l);
 char tipc_link_plane(struct tipc_link *l);
 int tipc_link_prio(struct tipc_link *l);
 int tipc_link_window(struct tipc_link *l);
+void tipc_link_update_caps(struct tipc_link *l, u16 capabilities);
+bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr);
 unsigned long tipc_link_tolerance(struct tipc_link *l);
 void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
                             struct sk_buff_head *xmitq);
index 5453e564da8276fc2e1b7510c2dee8589f1b1f89..67f69389ec179bff3518b35c9d546685d9d2bc41 100644 (file)
@@ -684,7 +684,8 @@ int tipc_nl_monitor_get_threshold(struct net *net)
        return tn->mon_threshold;
 }
 
-int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, struct tipc_nl_msg *msg)
+static int __tipc_nl_add_monitor_peer(struct tipc_peer *peer,
+                                     struct tipc_nl_msg *msg)
 {
        struct tipc_mon_domain *dom = peer->domain;
        struct nlattr *attrs;
index b6c45dccba3d2e7a3301167a90eecbf36d8ac07b..b61891054709597279d6204885a069b848dc869a 100644 (file)
@@ -416,26 +416,31 @@ bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu)
  */
 bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
 {
-       struct tipc_msg *msg;
-       int imsz, offset;
+       struct tipc_msg *hdr, *ihdr;
+       int imsz;
 
        *iskb = NULL;
        if (unlikely(skb_linearize(skb)))
                goto none;
 
-       msg = buf_msg(skb);
-       offset = msg_hdr_sz(msg) + *pos;
-       if (unlikely(offset > (msg_size(msg) - MIN_H_SIZE)))
+       hdr = buf_msg(skb);
+       if (unlikely(*pos > (msg_data_sz(hdr) - MIN_H_SIZE)))
                goto none;
 
-       *iskb = skb_clone(skb, GFP_ATOMIC);
-       if (unlikely(!*iskb))
+       ihdr = (struct tipc_msg *)(msg_data(hdr) + *pos);
+       imsz = msg_size(ihdr);
+
+       if ((*pos + imsz) > msg_data_sz(hdr))
                goto none;
-       skb_pull(*iskb, offset);
-       imsz = msg_size(buf_msg(*iskb));
-       skb_trim(*iskb, imsz);
+
+       *iskb = tipc_buf_acquire(imsz, GFP_ATOMIC);
+       if (!*iskb)
+               goto none;
+
+       skb_copy_to_linear_data(*iskb, ihdr, imsz);
        if (unlikely(!tipc_msg_validate(iskb)))
                goto none;
+
        *pos += align(imsz);
        return true;
 none:
@@ -531,12 +536,6 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
                msg_set_hdr_sz(hdr, BASIC_H_SIZE);
        }
 
-       if (skb_cloned(_skb) &&
-           pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
-               goto exit;
-
-       /* reassign after skb header modifications */
-       hdr = buf_msg(_skb);
        /* Now reverse the concerned fields */
        msg_set_errcode(hdr, err);
        msg_set_non_seq(hdr, 0);
@@ -595,10 +594,6 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
        if (!skb_cloned(skb))
                return true;
 
-       /* Unclone buffer in case it was bundled */
-       if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
-               return false;
-
        return true;
 }
 
index bebe88cae07bef565d191aea1bb08acd1f3c1cbd..88f027b502f6f5ed955b03bd645321ea8e79eec3 100644 (file)
@@ -735,7 +735,7 @@ int tipc_nametbl_init(struct net *net)
        struct name_table *nt;
        int i;
 
-       nt = kzalloc(sizeof(*nt), GFP_ATOMIC);
+       nt = kzalloc(sizeof(*nt), GFP_KERNEL);
        if (!nt)
                return -ENOMEM;
 
index 0453bd451ce80c1935bb6588facc0f2c23ae8644..68014f1b69765269236ac0a6d839ca754d1235da 100644 (file)
@@ -45,6 +45,7 @@
 #include "netlink.h"
 
 #define INVALID_NODE_SIG       0x10000
+#define NODE_CLEANUP_AFTER     300000
 
 /* Flags used to take different actions according to flag type
  * TIPC_NOTIFY_NODE_DOWN: notify node is down
@@ -96,6 +97,7 @@ struct tipc_bclink_entry {
  * @link_id: local and remote bearer ids of changing link, if any
  * @publ_list: list of publications
  * @rcu: rcu struct for tipc_node
+ * @delete_at: indicates the time for deleting a down node
  */
 struct tipc_node {
        u32 addr;
@@ -121,6 +123,7 @@ struct tipc_node {
        unsigned long keepalive_intv;
        struct timer_list timer;
        struct rcu_head rcu;
+       unsigned long delete_at;
 };
 
 /* Node FSM states and events:
@@ -160,6 +163,7 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
 static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
 static void tipc_node_put(struct tipc_node *node);
 static bool node_is_up(struct tipc_node *n);
+static void tipc_node_delete_from_list(struct tipc_node *node);
 
 struct tipc_sock_conn {
        u32 port;
@@ -359,13 +363,24 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
 {
        struct tipc_net *tn = net_generic(net, tipc_net_id);
        struct tipc_node *n, *temp_node;
+       struct tipc_link *l;
+       int bearer_id;
        int i;
 
        spin_lock_bh(&tn->node_list_lock);
        n = tipc_node_find(net, addr);
        if (n) {
+               if (n->capabilities == capabilities)
+                       goto exit;
                /* Same node may come back with new capabilities */
+               write_lock_bh(&n->lock);
                n->capabilities = capabilities;
+               for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+                       l = n->links[bearer_id].link;
+                       if (l)
+                               tipc_link_update_caps(l, capabilities);
+               }
+               write_unlock_bh(&n->lock);
                goto exit;
        }
        n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -390,6 +405,7 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
        for (i = 0; i < MAX_BEARERS; i++)
                spin_lock_init(&n->links[i].lock);
        n->state = SELF_DOWN_PEER_LEAVING;
+       n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
        n->signature = INVALID_NODE_SIG;
        n->active_links[0] = INVALID_BEARER_ID;
        n->active_links[1] = INVALID_BEARER_ID;
@@ -433,11 +449,16 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
        tipc_link_set_abort_limit(l, tol / n->keepalive_intv);
 }
 
-static void tipc_node_delete(struct tipc_node *node)
+static void tipc_node_delete_from_list(struct tipc_node *node)
 {
        list_del_rcu(&node->list);
        hlist_del_rcu(&node->hash);
        tipc_node_put(node);
+}
+
+static void tipc_node_delete(struct tipc_node *node)
+{
+       tipc_node_delete_from_list(node);
 
        del_timer_sync(&node->timer);
        tipc_node_put(node);
@@ -544,6 +565,42 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
        tipc_node_put(node);
 }
 
+static void  tipc_node_clear_links(struct tipc_node *node)
+{
+       int i;
+
+       for (i = 0; i < MAX_BEARERS; i++) {
+               struct tipc_link_entry *le = &node->links[i];
+
+               if (le->link) {
+                       kfree(le->link);
+                       le->link = NULL;
+                       node->link_cnt--;
+               }
+       }
+}
+
+/* tipc_node_cleanup - delete nodes that does not
+ * have active links for NODE_CLEANUP_AFTER time
+ */
+static int tipc_node_cleanup(struct tipc_node *peer)
+{
+       struct tipc_net *tn = tipc_net(peer->net);
+       bool deleted = false;
+
+       spin_lock_bh(&tn->node_list_lock);
+       tipc_node_write_lock(peer);
+
+       if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) {
+               tipc_node_clear_links(peer);
+               tipc_node_delete_from_list(peer);
+               deleted = true;
+       }
+       tipc_node_write_unlock(peer);
+       spin_unlock_bh(&tn->node_list_lock);
+       return deleted;
+}
+
 /* tipc_node_timeout - handle expiration of node timer
  */
 static void tipc_node_timeout(struct timer_list *t)
@@ -551,21 +608,29 @@ static void tipc_node_timeout(struct timer_list *t)
        struct tipc_node *n = from_timer(n, t, timer);
        struct tipc_link_entry *le;
        struct sk_buff_head xmitq;
+       int remains = n->link_cnt;
        int bearer_id;
        int rc = 0;
 
+       if (!node_is_up(n) && tipc_node_cleanup(n)) {
+               /*Removing the reference of Timer*/
+               tipc_node_put(n);
+               return;
+       }
+
        __skb_queue_head_init(&xmitq);
 
-       for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+       for (bearer_id = 0; remains && (bearer_id < MAX_BEARERS); bearer_id++) {
                tipc_node_read_lock(n);
                le = &n->links[bearer_id];
-               spin_lock_bh(&le->lock);
                if (le->link) {
+                       spin_lock_bh(&le->lock);
                        /* Link tolerance may change asynchronously: */
                        tipc_node_calculate_timer(n, le->link);
                        rc = tipc_link_timeout(le->link, &xmitq);
+                       spin_unlock_bh(&le->lock);
+                       remains--;
                }
-               spin_unlock_bh(&le->lock);
                tipc_node_read_unlock(n);
                tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr);
                if (rc & TIPC_LINK_DOWN_EVT)
@@ -1174,6 +1239,7 @@ static void node_lost_contact(struct tipc_node *n,
        uint i;
 
        pr_debug("Lost contact with %x\n", n->addr);
+       n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
 
        /* Clean up broadcast state */
        tipc_bcast_remove_peer(n->net, n->bc_entry.link);
@@ -1481,7 +1547,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
  * tipc_node_check_state - check and if necessary update node state
  * @skb: TIPC packet
  * @bearer_id: identity of bearer delivering the packet
- * Returns true if state is ok, otherwise consumes buffer and returns false
+ * Returns true if state and msg are ok, otherwise false
  */
 static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
                                  int bearer_id, struct sk_buff_head *xmitq)
@@ -1515,6 +1581,9 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
                }
        }
 
+       if (!tipc_link_validate_msg(l, hdr))
+               return false;
+
        /* Check and update node accesibility if applicable */
        if (state == SELF_UP_PEER_COMING) {
                if (!tipc_link_is_up(l))
@@ -1743,7 +1812,6 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
        struct tipc_node *peer;
        u32 addr;
        int err;
-       int i;
 
        /* We identify the peer by its net */
        if (!info->attrs[TIPC_NLA_NET])
@@ -1778,15 +1846,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
                goto err_out;
        }
 
-       for (i = 0; i < MAX_BEARERS; i++) {
-               struct tipc_link_entry *le = &peer->links[i];
-
-               if (le->link) {
-                       kfree(le->link);
-                       le->link = NULL;
-                       peer->link_cnt--;
-               }
-       }
+       tipc_node_clear_links(peer);
        tipc_node_write_unlock(peer);
        tipc_node_delete(peer);
 
index 846c8f240872f25c93af27edcb32692916223b4a..48b3298a248d493e083cb4e39adeebe3ab3f38a5 100644 (file)
@@ -49,14 +49,16 @@ enum {
        TIPC_BCAST_STATE_NACK = (1 << 2),
        TIPC_BLOCK_FLOWCTL    = (1 << 3),
        TIPC_BCAST_RCAST      = (1 << 4),
-       TIPC_NODE_ID128       = (1 << 5)
+       TIPC_NODE_ID128       = (1 << 5),
+       TIPC_LINK_PROTO_SEQNO = (1 << 6)
 };
 
-#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
-                               TIPC_BCAST_STATE_NACK | \
-                               TIPC_BCAST_RCAST | \
-                               TIPC_BLOCK_FLOWCTL | \
-                               TIPC_NODE_ID128)
+#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH       |  \
+                               TIPC_BCAST_STATE_NACK  |  \
+                               TIPC_BCAST_RCAST       |  \
+                               TIPC_BLOCK_FLOWCTL     |  \
+                               TIPC_NODE_ID128        |  \
+                               TIPC_LINK_PROTO_SEQNO)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
index 930852c54d7a6e97207c61a7c942e487781457e7..c1e93c9515bca5abaae647f18402a8bc25bfa2b4 100644 (file)
@@ -411,7 +411,6 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout)
 static int tipc_sk_create(struct net *net, struct socket *sock,
                          int protocol, int kern)
 {
-       struct tipc_net *tn;
        const struct proto_ops *ops;
        struct sock *sk;
        struct tipc_sock *tsk;
@@ -446,7 +445,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
        INIT_LIST_HEAD(&tsk->publications);
        INIT_LIST_HEAD(&tsk->cong_links);
        msg = &tsk->phdr;
-       tn = net_generic(sock_net(sk), tipc_net_id);
 
        /* Finish initializing socket data structures */
        sock->ops = ops;
@@ -716,7 +714,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
        struct tipc_sock *tsk = tipc_sk(sk);
        __poll_t revents = 0;
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
 
        if (sk->sk_shutdown & RCV_SHUTDOWN)
                revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
@@ -1117,7 +1115,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
        u32 self = tipc_own_addr(net);
        u32 type, lower, upper, scope;
        struct sk_buff *skb, *_skb;
-       u32 portid, oport, onode;
+       u32 portid, onode;
        struct sk_buff_head tmpq;
        struct list_head dports;
        struct tipc_msg *hdr;
@@ -1133,7 +1131,6 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
                user = msg_user(hdr);
                mtyp = msg_type(hdr);
                hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
-               oport = msg_origport(hdr);
                onode = msg_orignode(hdr);
                type = msg_nametype(hdr);
 
@@ -3320,6 +3317,11 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
                goto stat_msg_cancel;
 
        nla_nest_end(skb, stat);
+
+       if (tsk->group)
+               if (tipc_group_fill_sock_diag(tsk->group, skb))
+                       goto stat_msg_cancel;
+
        nla_nest_end(skb, attrs);
 
        return 0;
index a7a8f8e20ff3051b92b74e12fb3d6024676475fd..292742e50bfa4b3a540cbaa3eb2b07400e7141be 100644 (file)
@@ -52,9 +52,12 @@ static DEFINE_SPINLOCK(tls_device_lock);
 
 static void tls_device_free_ctx(struct tls_context *ctx)
 {
-       struct tls_offload_context *offload_ctx = tls_offload_ctx(ctx);
+       if (ctx->tx_conf == TLS_HW)
+               kfree(tls_offload_ctx_tx(ctx));
+
+       if (ctx->rx_conf == TLS_HW)
+               kfree(tls_offload_ctx_rx(ctx));
 
-       kfree(offload_ctx);
        kfree(ctx);
 }
 
@@ -71,10 +74,11 @@ static void tls_device_gc_task(struct work_struct *work)
        list_for_each_entry_safe(ctx, tmp, &gc_list, list) {
                struct net_device *netdev = ctx->netdev;
 
-               if (netdev) {
+               if (netdev && ctx->tx_conf == TLS_HW) {
                        netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
                                                        TLS_OFFLOAD_CTX_DIR_TX);
                        dev_put(netdev);
+                       ctx->netdev = NULL;
                }
 
                list_del(&ctx->list);
@@ -82,6 +86,22 @@ static void tls_device_gc_task(struct work_struct *work)
        }
 }
 
+static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
+                             struct net_device *netdev)
+{
+       if (sk->sk_destruct != tls_device_sk_destruct) {
+               refcount_set(&ctx->refcount, 1);
+               dev_hold(netdev);
+               ctx->netdev = netdev;
+               spin_lock_irq(&tls_device_lock);
+               list_add_tail(&ctx->list, &tls_device_list);
+               spin_unlock_irq(&tls_device_lock);
+
+               ctx->sk_destruct = sk->sk_destruct;
+               sk->sk_destruct = tls_device_sk_destruct;
+       }
+}
+
 static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
 {
        unsigned long flags;
@@ -125,7 +145,7 @@ static void destroy_record(struct tls_record_info *record)
        kfree(record);
 }
 
-static void delete_all_records(struct tls_offload_context *offload_ctx)
+static void delete_all_records(struct tls_offload_context_tx *offload_ctx)
 {
        struct tls_record_info *info, *temp;
 
@@ -141,14 +161,14 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_record_info *info, *temp;
-       struct tls_offload_context *ctx;
+       struct tls_offload_context_tx *ctx;
        u64 deleted_records = 0;
        unsigned long flags;
 
        if (!tls_ctx)
                return;
 
-       ctx = tls_offload_ctx(tls_ctx);
+       ctx = tls_offload_ctx_tx(tls_ctx);
 
        spin_lock_irqsave(&ctx->lock, flags);
        info = ctx->retransmit_hint;
@@ -179,15 +199,17 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
 void tls_device_sk_destruct(struct sock *sk)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+       struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 
-       if (ctx->open_record)
-               destroy_record(ctx->open_record);
+       tls_ctx->sk_destruct(sk);
 
-       delete_all_records(ctx);
-       crypto_free_aead(ctx->aead_send);
-       ctx->sk_destruct(sk);
-       clean_acked_data_disable(inet_csk(sk));
+       if (tls_ctx->tx_conf == TLS_HW) {
+               if (ctx->open_record)
+                       destroy_record(ctx->open_record);
+               delete_all_records(ctx);
+               crypto_free_aead(ctx->aead_send);
+               clean_acked_data_disable(inet_csk(sk));
+       }
 
        if (refcount_dec_and_test(&tls_ctx->refcount))
                tls_device_queue_ctx_destruction(tls_ctx);
@@ -219,7 +241,7 @@ static void tls_append_frag(struct tls_record_info *record,
 
 static int tls_push_record(struct sock *sk,
                           struct tls_context *ctx,
-                          struct tls_offload_context *offload_ctx,
+                          struct tls_offload_context_tx *offload_ctx,
                           struct tls_record_info *record,
                           struct page_frag *pfrag,
                           int flags,
@@ -264,7 +286,7 @@ static int tls_push_record(struct sock *sk,
        return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
 }
 
-static int tls_create_new_record(struct tls_offload_context *offload_ctx,
+static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
                                 struct page_frag *pfrag,
                                 size_t prepend_size)
 {
@@ -290,7 +312,7 @@ static int tls_create_new_record(struct tls_offload_context *offload_ctx,
 }
 
 static int tls_do_allocation(struct sock *sk,
-                            struct tls_offload_context *offload_ctx,
+                            struct tls_offload_context_tx *offload_ctx,
                             struct page_frag *pfrag,
                             size_t prepend_size)
 {
@@ -324,7 +346,7 @@ static int tls_push_data(struct sock *sk,
                         unsigned char record_type)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+       struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
        int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
        int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
        struct tls_record_info *record = ctx->open_record;
@@ -477,7 +499,7 @@ out:
        return rc;
 }
 
-struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
                                       u32 seq, u64 *p_record_sn)
 {
        u64 record_sn = context->hint_record_sn;
@@ -520,11 +542,123 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
        return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
 }
 
+void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct net_device *netdev = tls_ctx->netdev;
+       struct tls_offload_context_rx *rx_ctx;
+       u32 is_req_pending;
+       s64 resync_req;
+       u32 req_seq;
+
+       if (tls_ctx->rx_conf != TLS_HW)
+               return;
+
+       rx_ctx = tls_offload_ctx_rx(tls_ctx);
+       resync_req = atomic64_read(&rx_ctx->resync_req);
+       req_seq = ntohl(resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1);
+       is_req_pending = resync_req;
+
+       if (unlikely(is_req_pending) && req_seq == seq &&
+           atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
+               netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk,
+                                                     seq + TLS_HEADER_SIZE - 1,
+                                                     rcd_sn);
+}
+
+static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
+{
+       struct strp_msg *rxm = strp_msg(skb);
+       int err = 0, offset = rxm->offset, copy, nsg;
+       struct sk_buff *skb_iter, *unused;
+       struct scatterlist sg[1];
+       char *orig_buf, *buf;
+
+       orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE +
+                          TLS_CIPHER_AES_GCM_128_IV_SIZE, sk->sk_allocation);
+       if (!orig_buf)
+               return -ENOMEM;
+       buf = orig_buf;
+
+       nsg = skb_cow_data(skb, 0, &unused);
+       if (unlikely(nsg < 0)) {
+               err = nsg;
+               goto free_buf;
+       }
+
+       sg_init_table(sg, 1);
+       sg_set_buf(&sg[0], buf,
+                  rxm->full_len + TLS_HEADER_SIZE +
+                  TLS_CIPHER_AES_GCM_128_IV_SIZE);
+       skb_copy_bits(skb, offset, buf,
+                     TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE);
+
+       /* We are interested only in the decrypted data not the auth */
+       err = decrypt_skb(sk, skb, sg);
+       if (err != -EBADMSG)
+               goto free_buf;
+       else
+               err = 0;
+
+       copy = min_t(int, skb_pagelen(skb) - offset,
+                    rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+
+       if (skb->decrypted)
+               skb_store_bits(skb, offset, buf, copy);
+
+       offset += copy;
+       buf += copy;
+
+       skb_walk_frags(skb, skb_iter) {
+               copy = min_t(int, skb_iter->len,
+                            rxm->full_len - offset + rxm->offset -
+                            TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+
+               if (skb_iter->decrypted)
+                       skb_store_bits(skb_iter, offset, buf, copy);
+
+               offset += copy;
+               buf += copy;
+       }
+
+free_buf:
+       kfree(orig_buf);
+       return err;
+}
+
+int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
+       int is_decrypted = skb->decrypted;
+       int is_encrypted = !is_decrypted;
+       struct sk_buff *skb_iter;
+
+       /* Skip if it is already decrypted */
+       if (ctx->sw.decrypted)
+               return 0;
+
+       /* Check if all the data is decrypted already */
+       skb_walk_frags(skb, skb_iter) {
+               is_decrypted &= skb_iter->decrypted;
+               is_encrypted &= !skb_iter->decrypted;
+       }
+
+       ctx->sw.decrypted |= is_decrypted;
+
+       /* Return immedeatly if the record is either entirely plaintext or
+        * entirely ciphertext. Otherwise handle reencrypt partially decrypted
+        * record.
+        */
+       return (is_encrypted || is_decrypted) ? 0 :
+               tls_device_reencrypt(sk, skb);
+}
+
 int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 {
        u16 nonce_size, tag_size, iv_size, rec_seq_size;
        struct tls_record_info *start_marker_record;
-       struct tls_offload_context *offload_ctx;
+       struct tls_offload_context_tx *offload_ctx;
        struct tls_crypto_info *crypto_info;
        struct net_device *netdev;
        char *iv, *rec_seq;
@@ -546,7 +680,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
                goto out;
        }
 
-       offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE, GFP_KERNEL);
+       offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL);
        if (!offload_ctx) {
                rc = -ENOMEM;
                goto free_marker_record;
@@ -582,12 +716,11 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
        memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
 
        ctx->tx.rec_seq_size = rec_seq_size;
-       ctx->tx.rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
+       ctx->tx.rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
        if (!ctx->tx.rec_seq) {
                rc = -ENOMEM;
                goto free_iv;
        }
-       memcpy(ctx->tx.rec_seq, rec_seq, rec_seq_size);
 
        rc = tls_sw_fallback_init(sk, offload_ctx, crypto_info);
        if (rc)
@@ -609,7 +742,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 
        clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked);
        ctx->push_pending_record = tls_device_push_pending_record;
-       offload_ctx->sk_destruct = sk->sk_destruct;
 
        /* TLS offload is greatly simplified if we don't send
         * SKBs where only part of the payload needs to be encrypted.
@@ -619,8 +751,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
        if (skb)
                TCP_SKB_CB(skb)->eor = 1;
 
-       refcount_set(&ctx->refcount, 1);
-
        /* We support starting offload on multiple sockets
         * concurrently, so we only need a read lock here.
         * This lock must precede get_netdev_for_sock to prevent races between
@@ -655,19 +785,14 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
        if (rc)
                goto release_netdev;
 
-       ctx->netdev = netdev;
-
-       spin_lock_irq(&tls_device_lock);
-       list_add_tail(&ctx->list, &tls_device_list);
-       spin_unlock_irq(&tls_device_lock);
+       tls_device_attach(ctx, sk, netdev);
 
-       sk->sk_validate_xmit_skb = tls_validate_xmit_skb;
        /* following this assignment tls_is_sk_tx_device_offloaded
         * will return true and the context might be accessed
         * by the netdev's xmit function.
         */
-       smp_store_release(&sk->sk_destruct,
-                         &tls_device_sk_destruct);
+       smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb);
+       dev_put(netdev);
        up_read(&device_offload_lock);
        goto out;
 
@@ -690,6 +815,105 @@ out:
        return rc;
 }
 
+int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
+{
+       struct tls_offload_context_rx *context;
+       struct net_device *netdev;
+       int rc = 0;
+
+       /* We support starting offload on multiple sockets
+        * concurrently, so we only need a read lock here.
+        * This lock must precede get_netdev_for_sock to prevent races between
+        * NETDEV_DOWN and setsockopt.
+        */
+       down_read(&device_offload_lock);
+       netdev = get_netdev_for_sock(sk);
+       if (!netdev) {
+               pr_err_ratelimited("%s: netdev not found\n", __func__);
+               rc = -EINVAL;
+               goto release_lock;
+       }
+
+       if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
+               pr_err_ratelimited("%s: netdev %s with no TLS offload\n",
+                                  __func__, netdev->name);
+               rc = -ENOTSUPP;
+               goto release_netdev;
+       }
+
+       /* Avoid offloading if the device is down
+        * We don't want to offload new flows after
+        * the NETDEV_DOWN event
+        */
+       if (!(netdev->flags & IFF_UP)) {
+               rc = -EINVAL;
+               goto release_netdev;
+       }
+
+       context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL);
+       if (!context) {
+               rc = -ENOMEM;
+               goto release_netdev;
+       }
+
+       ctx->priv_ctx_rx = context;
+       rc = tls_set_sw_offload(sk, ctx, 0);
+       if (rc)
+               goto release_ctx;
+
+       rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
+                                            &ctx->crypto_recv,
+                                            tcp_sk(sk)->copied_seq);
+       if (rc) {
+               pr_err_ratelimited("%s: The netdev has refused to offload this socket\n",
+                                  __func__);
+               goto free_sw_resources;
+       }
+
+       tls_device_attach(ctx, sk, netdev);
+       goto release_netdev;
+
+free_sw_resources:
+       tls_sw_free_resources_rx(sk);
+release_ctx:
+       ctx->priv_ctx_rx = NULL;
+release_netdev:
+       dev_put(netdev);
+release_lock:
+       up_read(&device_offload_lock);
+       return rc;
+}
+
+void tls_device_offload_cleanup_rx(struct sock *sk)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct net_device *netdev;
+
+       down_read(&device_offload_lock);
+       netdev = tls_ctx->netdev;
+       if (!netdev)
+               goto out;
+
+       if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
+               pr_err_ratelimited("%s: device is missing NETIF_F_HW_TLS_RX cap\n",
+                                  __func__);
+               goto out;
+       }
+
+       netdev->tlsdev_ops->tls_dev_del(netdev, tls_ctx,
+                                       TLS_OFFLOAD_CTX_DIR_RX);
+
+       if (tls_ctx->tx_conf != TLS_HW) {
+               dev_put(netdev);
+               tls_ctx->netdev = NULL;
+       }
+out:
+       up_read(&device_offload_lock);
+       kfree(tls_ctx->rx.rec_seq);
+       kfree(tls_ctx->rx.iv);
+       tls_sw_release_resources_rx(sk);
+}
+
 static int tls_device_down(struct net_device *netdev)
 {
        struct tls_context *ctx, *tmp;
@@ -710,8 +934,12 @@ static int tls_device_down(struct net_device *netdev)
        spin_unlock_irqrestore(&tls_device_lock, flags);
 
        list_for_each_entry_safe(ctx, tmp, &list, list) {
-               netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
-                                               TLS_OFFLOAD_CTX_DIR_TX);
+               if (ctx->tx_conf == TLS_HW)
+                       netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+                                                       TLS_OFFLOAD_CTX_DIR_TX);
+               if (ctx->rx_conf == TLS_HW)
+                       netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+                                                       TLS_OFFLOAD_CTX_DIR_RX);
                ctx->netdev = NULL;
                dev_put(netdev);
                list_del_init(&ctx->list);
@@ -732,12 +960,16 @@ static int tls_dev_event(struct notifier_block *this, unsigned long event,
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
-       if (!(dev->features & NETIF_F_HW_TLS_TX))
+       if (!(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX)))
                return NOTIFY_DONE;
 
        switch (event) {
        case NETDEV_REGISTER:
        case NETDEV_FEAT_CHANGE:
+               if ((dev->features & NETIF_F_HW_TLS_RX) &&
+                   !dev->tlsdev_ops->tls_dev_resync_rx)
+                       return NOTIFY_BAD;
+
                if  (dev->tlsdev_ops &&
                     dev->tlsdev_ops->tls_dev_add &&
                     dev->tlsdev_ops->tls_dev_del)
index 748914abdb604e6434db7d8326e8a8480567e1da..e3313c45663f6debd5bd8bfa78b290f39acd8a7d 100644 (file)
@@ -214,7 +214,7 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
 
 static int fill_sg_in(struct scatterlist *sg_in,
                      struct sk_buff *skb,
-                     struct tls_offload_context *ctx,
+                     struct tls_offload_context_tx *ctx,
                      u64 *rcd_sn,
                      s32 *sync_size,
                      int *resync_sgs)
@@ -299,7 +299,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
                                   s32 sync_size, u64 rcd_sn)
 {
        int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
-       struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+       struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
        int payload_len = skb->len - tcp_payload_offset;
        void *buf, *iv, *aad, *dummy_buf;
        struct aead_request *aead_req;
@@ -361,7 +361,7 @@ static struct sk_buff *tls_sw_fallback(struct sock *sk, struct sk_buff *skb)
 {
        int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
        struct tls_context *tls_ctx = tls_get_ctx(sk);
-       struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+       struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
        int payload_len = skb->len - tcp_payload_offset;
        struct scatterlist *sg_in, sg_out[3];
        struct sk_buff *nskb = NULL;
@@ -413,9 +413,10 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 
        return tls_sw_fallback(sk, skb);
 }
+EXPORT_SYMBOL_GPL(tls_validate_xmit_skb);
 
 int tls_sw_fallback_init(struct sock *sk,
-                        struct tls_offload_context *offload_ctx,
+                        struct tls_offload_context_tx *offload_ctx,
                         struct tls_crypto_info *crypto_info)
 {
        const u8 *key;
index 301f224304698950544088c16518ea2e14ff41a6..b09867c8b8179f06634d3e614c90d2f4b56cf75e 100644 (file)
@@ -51,15 +51,6 @@ enum {
        TLSV6,
        TLS_NUM_PROTS,
 };
-enum {
-       TLS_BASE,
-       TLS_SW,
-#ifdef CONFIG_TLS_DEVICE
-       TLS_HW,
-#endif
-       TLS_HW_RECORD,
-       TLS_NUM_CONFIG,
-};
 
 static struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
@@ -290,7 +281,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
        }
 
 #ifdef CONFIG_TLS_DEVICE
-       if (ctx->tx_conf != TLS_HW) {
+       if (ctx->rx_conf == TLS_HW)
+               tls_device_offload_cleanup_rx(sk);
+
+       if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW) {
 #else
        {
 #endif
@@ -470,8 +464,16 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
                        conf = TLS_SW;
                }
        } else {
-               rc = tls_set_sw_offload(sk, ctx, 0);
-               conf = TLS_SW;
+#ifdef CONFIG_TLS_DEVICE
+               rc = tls_set_device_offload_rx(sk, ctx);
+               conf = TLS_HW;
+               if (rc) {
+#else
+               {
+#endif
+                       rc = tls_set_sw_offload(sk, ctx, 0);
+                       conf = TLS_SW;
+               }
        }
 
        if (rc)
@@ -629,6 +631,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
        prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
        prot[TLS_HW][TLS_SW].sendmsg            = tls_device_sendmsg;
        prot[TLS_HW][TLS_SW].sendpage           = tls_device_sendpage;
+
+       prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW];
+
+       prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW];
+
+       prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW];
 #endif
 
        prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
index 1f3d9789af30fb88cf9e7550b40dcda1e897e262..ff3a6904a722fdc7fd8727e21f4aff7028ef9244 100644 (file)
@@ -53,18 +53,14 @@ static int tls_do_decryption(struct sock *sk,
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-       struct strp_msg *rxm = strp_msg(skb);
        struct aead_request *aead_req;
 
        int ret;
-       unsigned int req_size = sizeof(struct aead_request) +
-               crypto_aead_reqsize(ctx->aead_recv);
 
-       aead_req = kzalloc(req_size, flags);
+       aead_req = aead_request_alloc(ctx->aead_recv, flags);
        if (!aead_req)
                return -ENOMEM;
 
-       aead_request_set_tfm(aead_req, ctx->aead_recv);
        aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
        aead_request_set_crypt(aead_req, sgin, sgout,
                               data_len + tls_ctx->rx.tag_size,
@@ -74,19 +70,7 @@ static int tls_do_decryption(struct sock *sk,
 
        ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait);
 
-       if (ret < 0)
-               goto out;
-
-       rxm->offset += tls_ctx->rx.prepend_size;
-       rxm->full_len -= tls_ctx->rx.overhead_size;
-       tls_advance_record_sn(sk, &tls_ctx->rx);
-
-       ctx->decrypted = true;
-
-       ctx->saved_data_ready(sk);
-
-out:
-       kfree(aead_req);
+       aead_request_free(aead_req);
        return ret;
 }
 
@@ -224,8 +208,7 @@ static int tls_push_record(struct sock *sk, int flags,
        struct aead_request *req;
        int rc;
 
-       req = kzalloc(sizeof(struct aead_request) +
-                     crypto_aead_reqsize(ctx->aead_send), sk->sk_allocation);
+       req = aead_request_alloc(ctx->aead_send, sk->sk_allocation);
        if (!req)
                return -ENOMEM;
 
@@ -267,7 +250,7 @@ static int tls_push_record(struct sock *sk, int flags,
 
        tls_advance_record_sn(sk, &tls_ctx->tx);
 out_req:
-       kfree(req);
+       aead_request_free(req);
        return rc;
 }
 
@@ -329,6 +312,8 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
        }
 
 out:
+       if (rc)
+               iov_iter_revert(from, size - *size_used);
        *size_used = size;
        *pages_used = num_elem;
 
@@ -377,6 +362,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
        int record_room;
        bool full_record;
        int orig_size;
+       bool is_kvec = msg->msg_iter.type & ITER_KVEC;
 
        if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
                return -ENOTSUPP;
@@ -425,8 +411,7 @@ alloc_encrypted:
                        try_to_copy -= required_size - ctx->sg_encrypted_size;
                        full_record = true;
                }
-
-               if (full_record || eor) {
+               if (!is_kvec && (full_record || eor)) {
                        ret = zerocopy_from_iter(sk, &msg->msg_iter,
                                try_to_copy, &ctx->sg_plaintext_num_elem,
                                &ctx->sg_plaintext_size,
@@ -438,15 +423,11 @@ alloc_encrypted:
 
                        copied += try_to_copy;
                        ret = tls_push_record(sk, msg->msg_flags, record_type);
-                       if (!ret)
-                               continue;
-                       if (ret < 0)
+                       if (ret)
                                goto send_end;
+                       continue;
 
-                       copied -= try_to_copy;
 fallback_to_reg_send:
-                       iov_iter_revert(&msg->msg_iter,
-                                       ctx->sg_plaintext_size - orig_size);
                        trim_sg(sk, ctx->sg_plaintext_data,
                                &ctx->sg_plaintext_num_elem,
                                &ctx->sg_plaintext_size,
@@ -673,8 +654,38 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
        return skb;
 }
 
-static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
-                      struct scatterlist *sgout)
+static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+                             struct scatterlist *sgout, bool *zc)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+       struct strp_msg *rxm = strp_msg(skb);
+       int err = 0;
+
+#ifdef CONFIG_TLS_DEVICE
+       err = tls_device_decrypted(sk, skb);
+       if (err < 0)
+               return err;
+#endif
+       if (!ctx->decrypted) {
+               err = decrypt_skb(sk, skb, sgout);
+               if (err < 0)
+                       return err;
+       } else {
+               *zc = false;
+       }
+
+       rxm->offset += tls_ctx->rx.prepend_size;
+       rxm->full_len -= tls_ctx->rx.overhead_size;
+       tls_advance_record_sn(sk, &tls_ctx->rx);
+       ctx->decrypted = true;
+       ctx->saved_data_ready(sk);
+
+       return err;
+}
+
+int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+               struct scatterlist *sgout)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -764,6 +775,7 @@ int tls_sw_recvmsg(struct sock *sk,
        bool cmsg = false;
        int target, err = 0;
        long timeo;
+       bool is_kvec = msg->msg_iter.type & ITER_KVEC;
 
        flags |= nonblock;
 
@@ -807,7 +819,7 @@ int tls_sw_recvmsg(struct sock *sk,
                        page_count = iov_iter_npages(&msg->msg_iter,
                                                     MAX_SKB_FRAGS);
                        to_copy = rxm->full_len - tls_ctx->rx.overhead_size;
-                       if (to_copy <= len && page_count < MAX_SKB_FRAGS &&
+                       if (!is_kvec && to_copy <= len && page_count < MAX_SKB_FRAGS &&
                            likely(!(flags & MSG_PEEK)))  {
                                struct scatterlist sgin[MAX_SKB_FRAGS + 1];
                                int pages = 0;
@@ -824,7 +836,7 @@ int tls_sw_recvmsg(struct sock *sk,
                                if (err < 0)
                                        goto fallback_to_reg_recv;
 
-                               err = decrypt_skb(sk, skb, sgin);
+                               err = decrypt_skb_update(sk, skb, sgin, &zc);
                                for (; pages > 0; pages--)
                                        put_page(sg_page(&sgin[pages]));
                                if (err < 0) {
@@ -833,7 +845,7 @@ int tls_sw_recvmsg(struct sock *sk,
                                }
                        } else {
 fallback_to_reg_recv:
-                               err = decrypt_skb(sk, skb, NULL);
+                               err = decrypt_skb_update(sk, skb, NULL, &zc);
                                if (err < 0) {
                                        tls_err_abort(sk, EBADMSG);
                                        goto recv_end;
@@ -888,6 +900,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
        int err = 0;
        long timeo;
        int chunk;
+       bool zc;
 
        lock_sock(sk);
 
@@ -904,7 +917,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
        }
 
        if (!ctx->decrypted) {
-               err = decrypt_skb(sk, skb, NULL);
+               err = decrypt_skb_update(sk, skb, NULL, &zc);
 
                if (err < 0) {
                        tls_err_abort(sk, EBADMSG);
@@ -950,7 +963,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 {
        struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
        struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-       char header[tls_ctx->rx.prepend_size];
+       char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
        struct strp_msg *rxm = strp_msg(skb);
        size_t cipher_overhead;
        size_t data_len = 0;
@@ -960,6 +973,12 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
        if (rxm->offset + tls_ctx->rx.prepend_size > skb->len)
                return 0;
 
+       /* Sanity-check size of on-stack buffer. */
+       if (WARN_ON(tls_ctx->rx.prepend_size > sizeof(header))) {
+               ret = -EINVAL;
+               goto read_failure;
+       }
+
        /* Linearize header to local buffer */
        ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size);
 
@@ -987,6 +1006,10 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
                goto read_failure;
        }
 
+#ifdef CONFIG_TLS_DEVICE
+       handle_device_resync(strp->sk, TCP_SKB_CB(skb)->seq + rxm->offset,
+                            *(u64*)tls_ctx->rx.rec_seq);
+#endif
        return data_len + TLS_HEADER_SIZE;
 
 read_failure:
@@ -999,16 +1022,13 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
 {
        struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
        struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-       struct strp_msg *rxm;
-
-       rxm = strp_msg(skb);
 
        ctx->decrypted = false;
 
        ctx->recv_pkt = skb;
        strp_pause(strp);
 
-       strp->sk->sk_state_change(strp->sk);
+       ctx->saved_data_ready(strp->sk);
 }
 
 static void tls_data_ready(struct sock *sk)
@@ -1024,23 +1044,20 @@ void tls_sw_free_resources_tx(struct sock *sk)
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 
-       if (ctx->aead_send)
-               crypto_free_aead(ctx->aead_send);
+       crypto_free_aead(ctx->aead_send);
        tls_free_both_sg(sk);
 
        kfree(ctx);
 }
 
-void tls_sw_free_resources_rx(struct sock *sk)
+void tls_sw_release_resources_rx(struct sock *sk)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
        if (ctx->aead_recv) {
-               if (ctx->recv_pkt) {
-                       kfree_skb(ctx->recv_pkt);
-                       ctx->recv_pkt = NULL;
-               }
+               kfree_skb(ctx->recv_pkt);
+               ctx->recv_pkt = NULL;
                crypto_free_aead(ctx->aead_recv);
                strp_stop(&ctx->strp);
                write_lock_bh(&sk->sk_callback_lock);
@@ -1050,6 +1067,14 @@ void tls_sw_free_resources_rx(struct sock *sk)
                strp_done(&ctx->strp);
                lock_sock(sk);
        }
+}
+
+void tls_sw_free_resources_rx(struct sock *sk)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+
+       tls_sw_release_resources_rx(sk);
 
        kfree(ctx);
 }
@@ -1074,28 +1099,38 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
        }
 
        if (tx) {
-               sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
-               if (!sw_ctx_tx) {
-                       rc = -ENOMEM;
-                       goto out;
+               if (!ctx->priv_ctx_tx) {
+                       sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
+                       if (!sw_ctx_tx) {
+                               rc = -ENOMEM;
+                               goto out;
+                       }
+                       ctx->priv_ctx_tx = sw_ctx_tx;
+               } else {
+                       sw_ctx_tx =
+                               (struct tls_sw_context_tx *)ctx->priv_ctx_tx;
                }
-               crypto_init_wait(&sw_ctx_tx->async_wait);
-               ctx->priv_ctx_tx = sw_ctx_tx;
        } else {
-               sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
-               if (!sw_ctx_rx) {
-                       rc = -ENOMEM;
-                       goto out;
+               if (!ctx->priv_ctx_rx) {
+                       sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
+                       if (!sw_ctx_rx) {
+                               rc = -ENOMEM;
+                               goto out;
+                       }
+                       ctx->priv_ctx_rx = sw_ctx_rx;
+               } else {
+                       sw_ctx_rx =
+                               (struct tls_sw_context_rx *)ctx->priv_ctx_rx;
                }
-               crypto_init_wait(&sw_ctx_rx->async_wait);
-               ctx->priv_ctx_rx = sw_ctx_rx;
        }
 
        if (tx) {
+               crypto_init_wait(&sw_ctx_tx->async_wait);
                crypto_info = &ctx->crypto_send;
                cctx = &ctx->tx;
                aead = &sw_ctx_tx->aead_send;
        } else {
+               crypto_init_wait(&sw_ctx_rx->async_wait);
                crypto_info = &ctx->crypto_recv;
                cctx = &ctx->rx;
                aead = &sw_ctx_rx->aead_recv;
@@ -1120,7 +1155,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
        }
 
        /* Sanity-check the IV size for stack allocations. */
-       if (iv_size > MAX_IV_SIZE) {
+       if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE) {
                rc = -EINVAL;
                goto free_priv;
        }
@@ -1138,12 +1173,11 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
        memcpy(cctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
        memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
        cctx->rec_seq_size = rec_seq_size;
-       cctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
+       cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
        if (!cctx->rec_seq) {
                rc = -ENOMEM;
                goto free_iv;
        }
-       memcpy(cctx->rec_seq, rec_seq, rec_seq_size);
 
        if (sw_ctx_tx) {
                sg_init_table(sw_ctx_tx->sg_encrypted_data,
index e5473c03d667ad51308c3e8b705f3b1187f619e8..d1edfa3cad61eaa63c2edbd4cbe70013aa298b18 100644 (file)
@@ -430,7 +430,12 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 
        connected = unix_dgram_peer_wake_connect(sk, other);
 
-       if (unix_recvq_full(other))
+       /* If other is SOCK_DEAD, we want to make sure we signal
+        * POLLOUT, such that a subsequent write() can get a
+        * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
+        * to other and its full, we will hang waiting for POLLOUT.
+        */
+       if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
                return 1;
 
        if (connected)
@@ -2635,7 +2640,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
        struct sock *sk = sock->sk;
        __poll_t mask;
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
        mask = 0;
 
        /* exceptional events? */
@@ -2672,7 +2677,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
        unsigned int writable;
        __poll_t mask;
 
-       sock_poll_wait(file, sk_sleep(sk), wait);
+       sock_poll_wait(file, wait);
        mask = 0;
 
        /* exceptional events? */
index eb2db0d3b8805ec0f8db12cf5ac8d410c8b9eb8b..c2a71ae487accee28becf9e3cb3fc514989a382e 100644 (file)
@@ -11,5 +11,3 @@ wimax-y :=            \
        stack.o
 
 wimax-$(CONFIG_DEBUG_FS) += debugfs.o
-
-
index 6c9bedb7431e5f58d02b471c27a32981bffbccb1..24514840746e675e82a38cdd76d85511b5675811 100644 (file)
@@ -76,5 +76,3 @@ void wimax_debugfs_rm(struct wimax_dev *wimax_dev)
 {
        debugfs_remove_recursive(wimax_dev->debugfs_dentry);
 }
-
-
index 54aa146930bd819a45ce3520484326553d37ad16..101b2fa3f32ecedac615e9e10376e386d19971f8 100644 (file)
@@ -404,4 +404,3 @@ error_no_wimax_dev:
        d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result);
        return result;
 }
-
index 5db731512014b3bddaf6f6553a646b7f8204a562..a6307813b6d5adddc58f3a2683a6a9c6f94ced2d 100644 (file)
@@ -486,7 +486,8 @@ int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev)
        d_fnstart(3, dev, "(wimax_dev %p net_dev %p)\n", wimax_dev, net_dev);
 
        /* Do the RFKILL setup before locking, as RFKILL will call
-        * into our functions. */
+        * into our functions.
+        */
        wimax_dev->net_dev = net_dev;
        result = wimax_rfkill_add(wimax_dev);
        if (result < 0)
@@ -629,4 +630,3 @@ module_exit(wimax_subsys_exit);
 MODULE_AUTHOR("Intel Corporation <linux-wimax@intel.com>");
 MODULE_DESCRIPTION("Linux WiMAX stack");
 MODULE_LICENSE("GPL");
-
index 48e8097339ab44cca29bc9bbc938b58ea3a43333..a88551f3bc43f201cfd09ec7dc93ed2a42e93cf8 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright 2006-2010         Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright 2015      Intel Deutschland GmbH
+ * Copyright 2015-2017 Intel Deutschland GmbH
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -744,6 +744,8 @@ int wiphy_register(struct wiphy *wiphy)
 
        /* sanity check supported bands/channels */
        for (band = 0; band < NUM_NL80211_BANDS; band++) {
+               u16 types = 0;
+
                sband = wiphy->bands[band];
                if (!sband)
                        continue;
@@ -788,6 +790,23 @@ int wiphy_register(struct wiphy *wiphy)
                        sband->channels[i].band = band;
                }
 
+               for (i = 0; i < sband->n_iftype_data; i++) {
+                       const struct ieee80211_sband_iftype_data *iftd;
+
+                       iftd = &sband->iftype_data[i];
+
+                       if (WARN_ON(!iftd->types_mask))
+                               return -EINVAL;
+                       if (WARN_ON(types & iftd->types_mask))
+                               return -EINVAL;
+
+                       /* at least one piece of information must be present */
+                       if (WARN_ON(!iftd->he_cap.has_he))
+                               return -EINVAL;
+
+                       types |= iftd->types_mask;
+               }
+
                have_band = true;
        }
 
index 63eb1b5fdd04561169618ca30110aea5225d92cc..7f52ef56932035fe0d0a503128a9e24f88c0dd22 100644 (file)
@@ -76,7 +76,7 @@ struct cfg80211_registered_device {
        struct cfg80211_scan_request *scan_req; /* protected by RTNL */
        struct sk_buff *scan_msg;
        struct list_head sched_scan_req_list;
-       unsigned long suspend_at;
+       time64_t suspend_at;
        struct work_struct scan_done_wk;
 
        struct genl_info *cur_cmd_info;
index ba0a1f398ce580a978a9d2e4ccd366cb5b644738..e6bce1f130c99da0e55f2be7e11d1521e6cc2b63 100644 (file)
@@ -65,9 +65,9 @@ struct lib80211_tkip_data {
        int key_idx;
 
        struct crypto_skcipher *rx_tfm_arc4;
-       struct crypto_ahash *rx_tfm_michael;
+       struct crypto_shash *rx_tfm_michael;
        struct crypto_skcipher *tx_tfm_arc4;
-       struct crypto_ahash *tx_tfm_michael;
+       struct crypto_shash *tx_tfm_michael;
 
        /* scratch buffers for virt_to_page() (crypto API) */
        u8 rx_hdr[16], tx_hdr[16];
@@ -106,8 +106,7 @@ static void *lib80211_tkip_init(int key_idx)
                goto fail;
        }
 
-       priv->tx_tfm_michael = crypto_alloc_ahash("michael_mic", 0,
-                                                 CRYPTO_ALG_ASYNC);
+       priv->tx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0);
        if (IS_ERR(priv->tx_tfm_michael)) {
                priv->tx_tfm_michael = NULL;
                goto fail;
@@ -120,8 +119,7 @@ static void *lib80211_tkip_init(int key_idx)
                goto fail;
        }
 
-       priv->rx_tfm_michael = crypto_alloc_ahash("michael_mic", 0,
-                                                 CRYPTO_ALG_ASYNC);
+       priv->rx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0);
        if (IS_ERR(priv->rx_tfm_michael)) {
                priv->rx_tfm_michael = NULL;
                goto fail;
@@ -131,9 +129,9 @@ static void *lib80211_tkip_init(int key_idx)
 
       fail:
        if (priv) {
-               crypto_free_ahash(priv->tx_tfm_michael);
+               crypto_free_shash(priv->tx_tfm_michael);
                crypto_free_skcipher(priv->tx_tfm_arc4);
-               crypto_free_ahash(priv->rx_tfm_michael);
+               crypto_free_shash(priv->rx_tfm_michael);
                crypto_free_skcipher(priv->rx_tfm_arc4);
                kfree(priv);
        }
@@ -145,9 +143,9 @@ static void lib80211_tkip_deinit(void *priv)
 {
        struct lib80211_tkip_data *_priv = priv;
        if (_priv) {
-               crypto_free_ahash(_priv->tx_tfm_michael);
+               crypto_free_shash(_priv->tx_tfm_michael);
                crypto_free_skcipher(_priv->tx_tfm_arc4);
-               crypto_free_ahash(_priv->rx_tfm_michael);
+               crypto_free_shash(_priv->rx_tfm_michael);
                crypto_free_skcipher(_priv->rx_tfm_arc4);
        }
        kfree(priv);
@@ -510,29 +508,36 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
        return keyidx;
 }
 
-static int michael_mic(struct crypto_ahash *tfm_michael, u8 * key, u8 * hdr,
-                      u8 * data, size_t data_len, u8 * mic)
+static int michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr,
+                      u8 *data, size_t data_len, u8 *mic)
 {
-       AHASH_REQUEST_ON_STACK(req, tfm_michael);
-       struct scatterlist sg[2];
+       SHASH_DESC_ON_STACK(desc, tfm_michael);
        int err;
 
        if (tfm_michael == NULL) {
                pr_warn("%s(): tfm_michael == NULL\n", __func__);
                return -1;
        }
-       sg_init_table(sg, 2);
-       sg_set_buf(&sg[0], hdr, 16);
-       sg_set_buf(&sg[1], data, data_len);
 
-       if (crypto_ahash_setkey(tfm_michael, key, 8))
+       desc->tfm = tfm_michael;
+       desc->flags = 0;
+
+       if (crypto_shash_setkey(tfm_michael, key, 8))
                return -1;
 
-       ahash_request_set_tfm(req, tfm_michael);
-       ahash_request_set_callback(req, 0, NULL, NULL);
-       ahash_request_set_crypt(req, sg, mic, data_len + 16);
-       err = crypto_ahash_digest(req);
-       ahash_request_zero(req);
+       err = crypto_shash_init(desc);
+       if (err)
+               goto out;
+       err = crypto_shash_update(desc, hdr, 16);
+       if (err)
+               goto out;
+       err = crypto_shash_update(desc, data, data_len);
+       if (err)
+               goto out;
+       err = crypto_shash_final(desc, mic);
+
+out:
+       shash_desc_zero(desc);
        return err;
 }
 
@@ -654,9 +659,9 @@ static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
 {
        struct lib80211_tkip_data *tkey = priv;
        int keyidx;
-       struct crypto_ahash *tfm = tkey->tx_tfm_michael;
+       struct crypto_shash *tfm = tkey->tx_tfm_michael;
        struct crypto_skcipher *tfm2 = tkey->tx_tfm_arc4;
-       struct crypto_ahash *tfm3 = tkey->rx_tfm_michael;
+       struct crypto_shash *tfm3 = tkey->rx_tfm_michael;
        struct crypto_skcipher *tfm4 = tkey->rx_tfm_arc4;
 
        keyidx = tkey->key_idx;
index 80bc986c79e5aea8d50121be481833738a1d50b7..5fb9b7dd98318b6e9d4842474361104855b42983 100644 (file)
@@ -428,6 +428,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
        [NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 },
        [NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 },
        [NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
+       [NL80211_ATTR_HE_CAPABILITY] = { .type = NLA_BINARY,
+                                        .len = NL80211_HE_MAX_CAPABILITY_LEN },
 };
 
 /* policy for the key attributes */
@@ -1324,6 +1326,34 @@ static int nl80211_send_coalesce(struct sk_buff *msg,
        return 0;
 }
 
+static int
+nl80211_send_iftype_data(struct sk_buff *msg,
+                        const struct ieee80211_sband_iftype_data *iftdata)
+{
+       const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap;
+
+       if (nl80211_put_iftypes(msg, NL80211_BAND_IFTYPE_ATTR_IFTYPES,
+                               iftdata->types_mask))
+               return -ENOBUFS;
+
+       if (he_cap->has_he) {
+               if (nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC,
+                           sizeof(he_cap->he_cap_elem.mac_cap_info),
+                           he_cap->he_cap_elem.mac_cap_info) ||
+                   nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY,
+                           sizeof(he_cap->he_cap_elem.phy_cap_info),
+                           he_cap->he_cap_elem.phy_cap_info) ||
+                   nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
+                           sizeof(he_cap->he_mcs_nss_supp),
+                           &he_cap->he_mcs_nss_supp) ||
+                   nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
+                           sizeof(he_cap->ppe_thres), he_cap->ppe_thres))
+                       return -ENOBUFS;
+       }
+
+       return 0;
+}
+
 static int nl80211_send_band_rateinfo(struct sk_buff *msg,
                                      struct ieee80211_supported_band *sband)
 {
@@ -1353,6 +1383,32 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg,
                         sband->vht_cap.cap)))
                return -ENOBUFS;
 
+       if (sband->n_iftype_data) {
+               struct nlattr *nl_iftype_data =
+                       nla_nest_start(msg, NL80211_BAND_ATTR_IFTYPE_DATA);
+               int err;
+
+               if (!nl_iftype_data)
+                       return -ENOBUFS;
+
+               for (i = 0; i < sband->n_iftype_data; i++) {
+                       struct nlattr *iftdata;
+
+                       iftdata = nla_nest_start(msg, i + 1);
+                       if (!iftdata)
+                               return -ENOBUFS;
+
+                       err = nl80211_send_iftype_data(msg,
+                                                      &sband->iftype_data[i]);
+                       if (err)
+                               return err;
+
+                       nla_nest_end(msg, iftdata);
+               }
+
+               nla_nest_end(msg, nl_iftype_data);
+       }
+
        /* add bitrates */
        nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
        if (!nl_rates)
@@ -2757,7 +2813,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
            nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) ||
            nla_put_u32(msg, NL80211_ATTR_GENERATION,
                        rdev->devlist_generation ^
-                       (cfg80211_rdev_list_generation << 2)))
+                       (cfg80211_rdev_list_generation << 2)) ||
+           nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr))
                goto nla_put_failure;
 
        if (rdev->ops->get_channel) {
@@ -4472,6 +4529,9 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
        case RATE_INFO_BW_160:
                rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH;
                break;
+       case RATE_INFO_BW_HE_RU:
+               rate_flg = 0;
+               WARN_ON(!(info->flags & RATE_INFO_FLAGS_HE_MCS));
        }
 
        if (rate_flg && nla_put_flag(msg, rate_flg))
@@ -4491,6 +4551,19 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
                if (info->flags & RATE_INFO_FLAGS_SHORT_GI &&
                    nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))
                        return false;
+       } else if (info->flags & RATE_INFO_FLAGS_HE_MCS) {
+               if (nla_put_u8(msg, NL80211_RATE_INFO_HE_MCS, info->mcs))
+                       return false;
+               if (nla_put_u8(msg, NL80211_RATE_INFO_HE_NSS, info->nss))
+                       return false;
+               if (nla_put_u8(msg, NL80211_RATE_INFO_HE_GI, info->he_gi))
+                       return false;
+               if (nla_put_u8(msg, NL80211_RATE_INFO_HE_DCM, info->he_dcm))
+                       return false;
+               if (info->bw == RATE_INFO_BW_HE_RU &&
+                   nla_put_u8(msg, NL80211_RATE_INFO_HE_RU_ALLOC,
+                              info->he_ru_alloc))
+                       return false;
        }
 
        nla_nest_end(msg, rate);
@@ -4547,13 +4620,13 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
 #define PUT_SINFO(attr, memb, type) do {                               \
        BUILD_BUG_ON(sizeof(type) == sizeof(u64));                      \
-       if (sinfo->filled & (1ULL << NL80211_STA_INFO_ ## attr) &&      \
+       if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) &&       \
            nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr,            \
                             sinfo->memb))                              \
                goto nla_put_failure;                                   \
        } while (0)
 #define PUT_SINFO_U64(attr, memb) do {                                 \
-       if (sinfo->filled & (1ULL << NL80211_STA_INFO_ ## attr) &&      \
+       if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) &&       \
            nla_put_u64_64bit(msg, NL80211_STA_INFO_ ## attr,           \
                              sinfo->memb, NL80211_STA_INFO_PAD))       \
                goto nla_put_failure;                                   \
@@ -4562,14 +4635,14 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
        PUT_SINFO(CONNECTED_TIME, connected_time, u32);
        PUT_SINFO(INACTIVE_TIME, inactive_time, u32);
 
-       if (sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES) |
-                            BIT(NL80211_STA_INFO_RX_BYTES64)) &&
+       if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) |
+                            BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) &&
            nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES,
                        (u32)sinfo->rx_bytes))
                goto nla_put_failure;
 
-       if (sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES) |
-                            BIT(NL80211_STA_INFO_TX_BYTES64)) &&
+       if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES) |
+                            BIT_ULL(NL80211_STA_INFO_TX_BYTES64)) &&
            nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES,
                        (u32)sinfo->tx_bytes))
                goto nla_put_failure;
@@ -4589,24 +4662,24 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
        default:
                break;
        }
-       if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL)) {
+       if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) {
                if (!nl80211_put_signal(msg, sinfo->chains,
                                        sinfo->chain_signal,
                                        NL80211_STA_INFO_CHAIN_SIGNAL))
                        goto nla_put_failure;
        }
-       if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) {
+       if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) {
                if (!nl80211_put_signal(msg, sinfo->chains,
                                        sinfo->chain_signal_avg,
                                        NL80211_STA_INFO_CHAIN_SIGNAL_AVG))
                        goto nla_put_failure;
        }
-       if (sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE)) {
+       if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) {
                if (!nl80211_put_sta_rate(msg, &sinfo->txrate,
                                          NL80211_STA_INFO_TX_BITRATE))
                        goto nla_put_failure;
        }
-       if (sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE)) {
+       if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) {
                if (!nl80211_put_sta_rate(msg, &sinfo->rxrate,
                                          NL80211_STA_INFO_RX_BITRATE))
                        goto nla_put_failure;
@@ -4622,7 +4695,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
        PUT_SINFO(PEER_PM, peer_pm, u32);
        PUT_SINFO(NONPEER_PM, nonpeer_pm, u32);
 
-       if (sinfo->filled & BIT(NL80211_STA_INFO_BSS_PARAM)) {
+       if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) {
                bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
                if (!bss_param)
                        goto nla_put_failure;
@@ -4641,7 +4714,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
                nla_nest_end(msg, bss_param);
        }
-       if ((sinfo->filled & BIT(NL80211_STA_INFO_STA_FLAGS)) &&
+       if ((sinfo->filled & BIT_ULL(NL80211_STA_INFO_STA_FLAGS)) &&
            nla_put(msg, NL80211_STA_INFO_STA_FLAGS,
                    sizeof(struct nl80211_sta_flag_update),
                    &sinfo->sta_flags))
@@ -4887,7 +4960,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
                        return -EINVAL;
                if (params->supported_rates)
                        return -EINVAL;
-               if (params->ext_capab || params->ht_capa || params->vht_capa)
+               if (params->ext_capab || params->ht_capa || params->vht_capa ||
+                   params->he_capa)
                        return -EINVAL;
        }
 
@@ -5093,6 +5167,15 @@ static int nl80211_set_station_tdls(struct genl_info *info,
        if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
                params->vht_capa =
                        nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
+       if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
+               params->he_capa =
+                       nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+               params->he_capa_len =
+                       nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+               if (params->he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN)
+                       return -EINVAL;
+       }
 
        err = nl80211_parse_sta_channel_info(info, params);
        if (err)
@@ -5320,6 +5403,17 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
                params.vht_capa =
                        nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
 
+       if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
+               params.he_capa =
+                       nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+               params.he_capa_len =
+                       nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+               /* max len is validated in nla policy */
+               if (params.he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN)
+                       return -EINVAL;
+       }
+
        if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
                params.opmode_notif_used = true;
                params.opmode_notif =
@@ -5352,6 +5446,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
        if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) {
                params.ht_capa = NULL;
                params.vht_capa = NULL;
+
+               /* HE requires WME */
+               if (params.he_capa_len)
+                       return -EINVAL;
        }
 
        /* When you run into this, adjust the code below for the new flag */
@@ -6849,6 +6947,16 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
        return regulatory_pre_cac_allowed(wdev->wiphy);
 }
 
+static bool nl80211_check_scan_feat(struct wiphy *wiphy, u32 flags, u32 flag,
+                                   enum nl80211_ext_feature_index feat)
+{
+       if (!(flags & flag))
+               return true;
+       if (wiphy_ext_feature_isset(wiphy, feat))
+               return true;
+       return false;
+}
+
 static int
 nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
                         void *request, struct nlattr **attrs,
@@ -6883,15 +6991,33 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
 
        if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
             !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) ||
-           ((*flags & NL80211_SCAN_FLAG_LOW_SPAN) &&
-            !wiphy_ext_feature_isset(wiphy,
-                                     NL80211_EXT_FEATURE_LOW_SPAN_SCAN)) ||
-           ((*flags & NL80211_SCAN_FLAG_LOW_POWER) &&
-            !wiphy_ext_feature_isset(wiphy,
-                                     NL80211_EXT_FEATURE_LOW_POWER_SCAN)) ||
-           ((*flags & NL80211_SCAN_FLAG_HIGH_ACCURACY) &&
-            !wiphy_ext_feature_isset(wiphy,
-                                     NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN)))
+           !nl80211_check_scan_feat(wiphy, *flags,
+                                    NL80211_SCAN_FLAG_LOW_SPAN,
+                                    NL80211_EXT_FEATURE_LOW_SPAN_SCAN) ||
+           !nl80211_check_scan_feat(wiphy, *flags,
+                                    NL80211_SCAN_FLAG_LOW_POWER,
+                                    NL80211_EXT_FEATURE_LOW_POWER_SCAN) ||
+           !nl80211_check_scan_feat(wiphy, *flags,
+                                    NL80211_SCAN_FLAG_HIGH_ACCURACY,
+                                    NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN) ||
+           !nl80211_check_scan_feat(wiphy, *flags,
+                                    NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME,
+                                    NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME) ||
+           !nl80211_check_scan_feat(wiphy, *flags,
+                                    NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP,
+                                    NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP) ||
+           !nl80211_check_scan_feat(wiphy, *flags,
+                                    NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
+                                    NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) ||
+           !nl80211_check_scan_feat(wiphy, *flags,
+                                    NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE,
+                                    NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE) ||
+           !nl80211_check_scan_feat(wiphy, *flags,
+                                    NL80211_SCAN_FLAG_RANDOM_SN,
+                                    NL80211_EXT_FEATURE_SCAN_RANDOM_SN) ||
+           !nl80211_check_scan_feat(wiphy, *flags,
+                                    NL80211_SCAN_FLAG_MIN_PREQ_CONTENT,
+                                    NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT))
                return -EOPNOTSUPP;
 
        if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
@@ -6906,26 +7032,6 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
                        return err;
        }
 
-       if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
-           !wiphy_ext_feature_isset(wiphy,
-                                    NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
-               return -EOPNOTSUPP;
-
-       if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
-          !wiphy_ext_feature_isset(wiphy,
-                                   NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
-               return -EOPNOTSUPP;
-
-       if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
-           !wiphy_ext_feature_isset(wiphy,
-                                    NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
-               return -EOPNOTSUPP;
-
-       if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
-           !wiphy_ext_feature_isset(wiphy,
-                                    NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
-               return -EOPNOTSUPP;
-
        return 0;
 }
 
@@ -10148,7 +10254,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
                if (err)
                        return err;
 
-               if (sinfo.filled & BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
+               if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
                        wdev->cqm_config->last_rssi_event_value =
                                (s8) sinfo.rx_beacon_signal_avg;
        }
index 570a2b67ca1036796cc5021a0f0ce546811a4e6f..6ab32f6a19616e0825691ddfa684105b00f00c2c 100644 (file)
@@ -102,7 +102,7 @@ static int wiphy_suspend(struct device *dev)
        struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
        int ret = 0;
 
-       rdev->suspend_at = get_seconds();
+       rdev->suspend_at = ktime_get_boottime_seconds();
 
        rtnl_lock();
        if (rdev->wiphy.registered) {
@@ -130,7 +130,7 @@ static int wiphy_resume(struct device *dev)
        int ret = 0;
 
        /* Age scan results with time spent in suspend */
-       cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
+       cfg80211_bss_age(rdev, ktime_get_boottime_seconds() - rdev->suspend_at);
 
        rtnl_lock();
        if (rdev->wiphy.registered && rdev->ops->resume)
index 3c654cd7ba562ad874c7176960c688b53fb80f61..e0825a019e9fb255adc2f4f749b08e241b2c2dde 100644 (file)
@@ -4,6 +4,7 @@
  *
  * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright 2017      Intel Deutschland GmbH
  */
 #include <linux/export.h>
 #include <linux/bitops.h>
@@ -1142,6 +1143,85 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
        return 0;
 }
 
+static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
+{
+#define SCALE 2048
+       u16 mcs_divisors[12] = {
+               34133, /* 16.666666... */
+               17067, /*  8.333333... */
+               11378, /*  5.555555... */
+                8533, /*  4.166666... */
+                5689, /*  2.777777... */
+                4267, /*  2.083333... */
+                3923, /*  1.851851... */
+                3413, /*  1.666666... */
+                2844, /*  1.388888... */
+                2560, /*  1.250000... */
+                2276, /*  1.111111... */
+                2048, /*  1.000000... */
+       };
+       u32 rates_160M[3] = { 960777777, 907400000, 816666666 };
+       u32 rates_969[3] =  { 480388888, 453700000, 408333333 };
+       u32 rates_484[3] =  { 229411111, 216666666, 195000000 };
+       u32 rates_242[3] =  { 114711111, 108333333,  97500000 };
+       u32 rates_106[3] =  {  40000000,  37777777,  34000000 };
+       u32 rates_52[3]  =  {  18820000,  17777777,  16000000 };
+       u32 rates_26[3]  =  {   9411111,   8888888,   8000000 };
+       u64 tmp;
+       u32 result;
+
+       if (WARN_ON_ONCE(rate->mcs > 11))
+               return 0;
+
+       if (WARN_ON_ONCE(rate->he_gi > NL80211_RATE_INFO_HE_GI_3_2))
+               return 0;
+       if (WARN_ON_ONCE(rate->he_ru_alloc >
+                        NL80211_RATE_INFO_HE_RU_ALLOC_2x996))
+               return 0;
+       if (WARN_ON_ONCE(rate->nss < 1 || rate->nss > 8))
+               return 0;
+
+       if (rate->bw == RATE_INFO_BW_160)
+               result = rates_160M[rate->he_gi];
+       else if (rate->bw == RATE_INFO_BW_80 ||
+                (rate->bw == RATE_INFO_BW_HE_RU &&
+                 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_996))
+               result = rates_969[rate->he_gi];
+       else if (rate->bw == RATE_INFO_BW_40 ||
+                (rate->bw == RATE_INFO_BW_HE_RU &&
+                 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_484))
+               result = rates_484[rate->he_gi];
+       else if (rate->bw == RATE_INFO_BW_20 ||
+                (rate->bw == RATE_INFO_BW_HE_RU &&
+                 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_242))
+               result = rates_242[rate->he_gi];
+       else if (rate->bw == RATE_INFO_BW_HE_RU &&
+                rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_106)
+               result = rates_106[rate->he_gi];
+       else if (rate->bw == RATE_INFO_BW_HE_RU &&
+                rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_52)
+               result = rates_52[rate->he_gi];
+       else if (rate->bw == RATE_INFO_BW_HE_RU &&
+                rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26)
+               result = rates_26[rate->he_gi];
+       else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
+                     rate->bw, rate->he_ru_alloc))
+               return 0;
+
+       /* now scale to the appropriate MCS */
+       tmp = result;
+       tmp *= SCALE;
+       do_div(tmp, mcs_divisors[rate->mcs]);
+       result = tmp;
+
+       /* and take NSS, DCM into account */
+       result = (result * rate->nss) / 8;
+       if (rate->he_dcm)
+               result /= 2;
+
+       return result;
+}
+
 u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 {
        if (rate->flags & RATE_INFO_FLAGS_MCS)
@@ -1150,6 +1230,8 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
                return cfg80211_calculate_bitrate_60g(rate);
        if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
                return cfg80211_calculate_bitrate_vht(rate);
+       if (rate->flags & RATE_INFO_FLAGS_HE_MCS)
+               return cfg80211_calculate_bitrate_he(rate);
 
        return rate->legacy;
 }
@@ -1791,8 +1873,9 @@ bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
 
 int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp)
 {
-       sinfo->pertid = kcalloc(sizeof(*(sinfo->pertid)),
-                               IEEE80211_NUM_TIDS + 1, gfp);
+       sinfo->pertid = kcalloc(IEEE80211_NUM_TIDS + 1,
+                               sizeof(*(sinfo->pertid)),
+                               gfp);
        if (!sinfo->pertid)
                return -ENOMEM;
 
index 05186a47878fe93b87807130b00d978d3a6d9bc5..167f7025ac98288acbd57cd4627b1eb7fb2f6520 100644 (file)
@@ -1278,7 +1278,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
        if (err)
                return err;
 
-       if (!(sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)))
+       if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)))
                return -EOPNOTSUPP;
 
        rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate);
@@ -1320,7 +1320,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 
        switch (rdev->wiphy.signal_type) {
        case CFG80211_SIGNAL_TYPE_MBM:
-               if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) {
+               if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) {
                        int sig = sinfo.signal;
                        wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
                        wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
@@ -1334,7 +1334,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
                        break;
                }
        case CFG80211_SIGNAL_TYPE_UNSPEC:
-               if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) {
+               if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) {
                        wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
                        wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
                        wstats.qual.level = sinfo.signal;
@@ -1347,9 +1347,9 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
        }
 
        wstats.qual.updated |= IW_QUAL_NOISE_INVALID;
-       if (sinfo.filled & BIT(NL80211_STA_INFO_RX_DROP_MISC))
+       if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC))
                wstats.discard.misc = sinfo.rx_dropped_misc;
-       if (sinfo.filled & BIT(NL80211_STA_INFO_TX_FAILED))
+       if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))
                wstats.discard.retries = sinfo.tx_failed;
 
        return &wstats;
index e2fa133f9fba225656fa62f4e9fb61e745a4d46a..59fcb41fc5e6f4cc3eff162a0af0f8b9dbbd4b19 100644 (file)
@@ -31,5 +31,3 @@ config X25
 
          To compile this driver as a module, choose M here: the module
          will be called x25. If unsure, say N.
-
-
index 9c214ec681ac88cb48a5eac7bc43a5edef4d0a2a..743103786652d71824a7f43d64799826f361d8c6 100644 (file)
@@ -381,4 +381,3 @@ void x25_check_rbuf(struct sock *sk)
                x25_stop_timer(sk);
        }
 }
-
index f47abb46c5874c8b949b8d3cb3d278c479775e06..911ca6d3cb5a6cd7d056a04cf3df57d5833bafeb 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/slab.h>
 #include <linux/bpf.h>
 #include <linux/mm.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
 
 #include "xdp_umem.h"
 #include "xsk_queue.h"
@@ -40,6 +42,21 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
        }
 }
 
+int xdp_umem_query(struct net_device *dev, u16 queue_id)
+{
+       struct netdev_bpf bpf;
+
+       ASSERT_RTNL();
+
+       memset(&bpf, 0, sizeof(bpf));
+       bpf.command = XDP_QUERY_XSK_UMEM;
+       bpf.xsk.queue_id = queue_id;
+
+       if (!dev->netdev_ops->ndo_bpf)
+               return 0;
+       return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
+}
+
 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
                        u32 queue_id, u16 flags)
 {
@@ -56,41 +73,36 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
        if (force_copy)
                return 0;
 
-       dev_hold(dev);
-
-       if (dev->netdev_ops->ndo_bpf && dev->netdev_ops->ndo_xsk_async_xmit) {
-               bpf.command = XDP_QUERY_XSK_UMEM;
+       if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
+               return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
 
-               rtnl_lock();
-               err = dev->netdev_ops->ndo_bpf(dev, &bpf);
-               rtnl_unlock();
+       bpf.command = XDP_QUERY_XSK_UMEM;
 
-               if (err) {
-                       dev_put(dev);
-                       return force_zc ? -ENOTSUPP : 0;
-               }
-
-               bpf.command = XDP_SETUP_XSK_UMEM;
-               bpf.xsk.umem = umem;
-               bpf.xsk.queue_id = queue_id;
+       rtnl_lock();
+       err = xdp_umem_query(dev, queue_id);
+       if (err) {
+               err = err < 0 ? -ENOTSUPP : -EBUSY;
+               goto err_rtnl_unlock;
+       }
 
-               rtnl_lock();
-               err = dev->netdev_ops->ndo_bpf(dev, &bpf);
-               rtnl_unlock();
+       bpf.command = XDP_SETUP_XSK_UMEM;
+       bpf.xsk.umem = umem;
+       bpf.xsk.queue_id = queue_id;
 
-               if (err) {
-                       dev_put(dev);
-                       return force_zc ? err : 0; /* fail or fallback */
-               }
+       err = dev->netdev_ops->ndo_bpf(dev, &bpf);
+       if (err)
+               goto err_rtnl_unlock;
+       rtnl_unlock();
 
-               umem->dev = dev;
-               umem->queue_id = queue_id;
-               umem->zc = true;
-               return 0;
-       }
+       dev_hold(dev);
+       umem->dev = dev;
+       umem->queue_id = queue_id;
+       umem->zc = true;
+       return 0;
 
-       dev_put(dev);
-       return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
+err_rtnl_unlock:
+       rtnl_unlock();
+       return force_zc ? err : 0; /* fail or fallback */
 }
 
 static void xdp_umem_clear_dev(struct xdp_umem *umem)
index 286ed25c1a698ae9bb2b89b110d0469475a1e2de..4a9ee2d83158ba87a4da985af1020faae8c440b7 100644 (file)
@@ -25,6 +25,14 @@ config XFRM_USER
 
          If unsure, say Y.
 
+config XFRM_INTERFACE
+       tristate "Transformation virtual interface"
+       depends on XFRM && IPV6
+       ---help---
+         This provides a virtual interface to route IPsec traffic.
+
+         If unsure, say N.
+
 config XFRM_SUB_POLICY
        bool "Transformation sub policy support"
        depends on XFRM
@@ -87,4 +95,3 @@ config NET_KEY_MIGRATE
          <draft-sugimoto-mip6-pfkey-migrate>.
 
          If unsure, say N.
-
index 0bd2465a8c5a8e095d87642f9d71a19fbd6572e1..fbc4552d17b85646d0ac934f1054604e53bce75a 100644 (file)
@@ -10,3 +10,4 @@ obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
+obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
index 175941e15a6edc3e9a2f884b9c5459484f30d6f1..5611b75210208cd3594922ede88f7c13c393cf3f 100644 (file)
@@ -56,7 +56,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
        if (skb_is_gso(skb)) {
                struct net_device *dev = skb->dev;
 
-               if (unlikely(!x->xso.offload_handle || (x->xso.dev != dev))) {
+               if (unlikely(x->xso.dev != dev)) {
                        struct sk_buff *segs;
 
                        /* Packet got rerouted, fixup features and segment it. */
@@ -162,7 +162,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
                }
 
                dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr,
-                                       x->props.family, x->props.output_mark);
+                                       x->props.family,
+                                       xfrm_smark_get(0, x));
                if (IS_ERR(dst))
                        return 0;
 
@@ -210,8 +211,8 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
        if (!x->type_offload || x->encap)
                return false;
 
-       if ((!dev || (x->xso.offload_handle && (dev == xfrm_dst_path(dst)->dev))) &&
-            (!xdst->child->xfrm && x->type->get_mtu)) {
+       if ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
+           (!xdst->child->xfrm && x->type->get_mtu)) {
                mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
 
                if (skb->len <= mtu)
@@ -306,12 +307,6 @@ static int xfrm_dev_register(struct net_device *dev)
        return xfrm_api_check(dev);
 }
 
-static int xfrm_dev_unregister(struct net_device *dev)
-{
-       xfrm_policy_cache_flush();
-       return NOTIFY_DONE;
-}
-
 static int xfrm_dev_feat_change(struct net_device *dev)
 {
        return xfrm_api_check(dev);
@@ -322,7 +317,6 @@ static int xfrm_dev_down(struct net_device *dev)
        if (dev->features & NETIF_F_HW_ESP)
                xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-       xfrm_policy_cache_flush();
        return NOTIFY_DONE;
 }
 
@@ -334,9 +328,6 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
        case NETDEV_REGISTER:
                return xfrm_dev_register(dev);
 
-       case NETDEV_UNREGISTER:
-               return xfrm_dev_unregister(dev);
-
        case NETDEV_FEAT_CHANGE:
                return xfrm_dev_feat_change(dev);
 
index 352abca2605f6efbb05682bc0ca5f1de1f4402a4..b89c9c7f8c5c12a13772ae4d838ce9f121bd51f5 100644 (file)
@@ -320,6 +320,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
        seq = 0;
        if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
+               secpath_reset(skb);
                XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
                goto drop;
        }
@@ -328,17 +329,21 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
                                   XFRM_SPI_SKB_CB(skb)->daddroff);
        do {
                if (skb->sp->len == XFRM_MAX_DEPTH) {
+                       secpath_reset(skb);
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
                        goto drop;
                }
 
                x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
                if (x == NULL) {
+                       secpath_reset(skb);
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
                        xfrm_audit_state_notfound(skb, family, spi, seq);
                        goto drop;
                }
 
+               skb->mark = xfrm_smark_get(skb->mark, x);
+
                skb->sp->xvec[skb->sp->len++] = x;
 
 lock:
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
new file mode 100644 (file)
index 0000000..31acc6f
--- /dev/null
@@ -0,0 +1,975 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *     XFRM virtual interface
+ *
+ *     Copyright (C) 2018 secunet Security Networks AG
+ *
+ *     Author:
+ *     Steffen Klassert <steffen.klassert@secunet.com>
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_link.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <linux/etherdevice.h>
+
+static int xfrmi_dev_init(struct net_device *dev);
+static void xfrmi_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
+static unsigned int xfrmi_net_id __read_mostly;
+
+struct xfrmi_net {
+       /* lists for storing interfaces in use */
+       struct xfrm_if __rcu *xfrmi[1];
+};
+
+#define for_each_xfrmi_rcu(start, xi) \
+       for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
+
+static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
+{
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+       struct xfrm_if *xi;
+
+       for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
+               if (x->if_id == xi->p.if_id &&
+                   (xi->dev->flags & IFF_UP))
+                       return xi;
+       }
+
+       return NULL;
+}
+
+static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
+{
+       struct xfrmi_net *xfrmn;
+       int ifindex;
+       struct xfrm_if *xi;
+
+       if (!skb->dev)
+               return NULL;
+
+       xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id);
+       ifindex = skb->dev->ifindex;
+
+       for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
+               if (ifindex == xi->dev->ifindex &&
+                       (xi->dev->flags & IFF_UP))
+                               return xi;
+       }
+
+       return NULL;
+}
+
+static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
+{
+       struct xfrm_if __rcu **xip = &xfrmn->xfrmi[0];
+
+       rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
+       rcu_assign_pointer(*xip, xi);
+}
+
+static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
+{
+       struct xfrm_if __rcu **xip;
+       struct xfrm_if *iter;
+
+       for (xip = &xfrmn->xfrmi[0];
+            (iter = rtnl_dereference(*xip)) != NULL;
+            xip = &iter->next) {
+               if (xi == iter) {
+                       rcu_assign_pointer(*xip, xi->next);
+                       break;
+               }
+       }
+}
+
+static void xfrmi_dev_free(struct net_device *dev)
+{
+       free_percpu(dev->tstats);
+}
+
+static int xfrmi_create2(struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net *net = dev_net(dev);
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+       int err;
+
+       dev->rtnl_link_ops = &xfrmi_link_ops;
+       err = register_netdevice(dev);
+       if (err < 0)
+               goto out;
+
+       strcpy(xi->p.name, dev->name);
+
+       dev_hold(dev);
+       xfrmi_link(xfrmn, xi);
+
+       return 0;
+
+out:
+       return err;
+}
+
+static struct xfrm_if *xfrmi_create(struct net *net, struct xfrm_if_parms *p)
+{
+       struct net_device *dev;
+       struct xfrm_if *xi;
+       char name[IFNAMSIZ];
+       int err;
+
+       if (p->name[0]) {
+               strlcpy(name, p->name, IFNAMSIZ);
+       } else {
+               err = -EINVAL;
+               goto failed;
+       }
+
+       dev = alloc_netdev(sizeof(*xi), name, NET_NAME_UNKNOWN, xfrmi_dev_setup);
+       if (!dev) {
+               err = -EAGAIN;
+               goto failed;
+       }
+
+       dev_net_set(dev, net);
+
+       xi = netdev_priv(dev);
+       xi->p = *p;
+       xi->net = net;
+       xi->dev = dev;
+       xi->phydev = dev_get_by_index(net, p->link);
+       if (!xi->phydev) {
+               err = -ENODEV;
+               goto failed_free;
+       }
+
+       err = xfrmi_create2(dev);
+       if (err < 0)
+               goto failed_dev_put;
+
+       return xi;
+
+failed_dev_put:
+       dev_put(xi->phydev);
+failed_free:
+       free_netdev(dev);
+failed:
+       return ERR_PTR(err);
+}
+
+static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p,
+                                  int create)
+{
+       struct xfrm_if __rcu **xip;
+       struct xfrm_if *xi;
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+
+       for (xip = &xfrmn->xfrmi[0];
+            (xi = rtnl_dereference(*xip)) != NULL;
+            xip = &xi->next) {
+               if (xi->p.if_id == p->if_id) {
+                       if (create)
+                               return ERR_PTR(-EEXIST);
+
+                       return xi;
+               }
+       }
+       if (!create)
+               return ERR_PTR(-ENODEV);
+       return xfrmi_create(net, p);
+}
+
+static void xfrmi_dev_uninit(struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
+
+       xfrmi_unlink(xfrmn, xi);
+       dev_put(xi->phydev);
+       dev_put(dev);
+}
+
+static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
+{
+       skb->tstamp = 0;
+       skb->pkt_type = PACKET_HOST;
+       skb->skb_iif = 0;
+       skb->ignore_df = 0;
+       skb_dst_drop(skb);
+       nf_reset(skb);
+       nf_reset_trace(skb);
+
+       if (!xnet)
+               return;
+
+       ipvs_reset(skb);
+       secpath_reset(skb);
+       skb_orphan(skb);
+       skb->mark = 0;
+}
+
+static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
+{
+       struct pcpu_sw_netstats *tstats;
+       struct xfrm_mode *inner_mode;
+       struct net_device *dev;
+       struct xfrm_state *x;
+       struct xfrm_if *xi;
+       bool xnet;
+
+       if (err && !skb->sp)
+               return 0;
+
+       x = xfrm_input_state(skb);
+
+       xi = xfrmi_lookup(xs_net(x), x);
+       if (!xi)
+               return 1;
+
+       dev = xi->dev;
+       skb->dev = dev;
+
+       if (err) {
+               dev->stats.rx_errors++;
+               dev->stats.rx_dropped++;
+
+               return 0;
+       }
+
+       xnet = !net_eq(xi->net, dev_net(skb->dev));
+
+       if (xnet) {
+               inner_mode = x->inner_mode;
+
+               if (x->sel.family == AF_UNSPEC) {
+                       inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+                       if (inner_mode == NULL) {
+                               XFRM_INC_STATS(dev_net(skb->dev),
+                                              LINUX_MIB_XFRMINSTATEMODEERROR);
+                               return -EINVAL;
+                       }
+               }
+
+               if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
+                                      inner_mode->afinfo->family))
+                       return -EPERM;
+       }
+
+       xfrmi_scrub_packet(skb, xnet);
+
+       tstats = this_cpu_ptr(dev->tstats);
+
+       u64_stats_update_begin(&tstats->syncp);
+       tstats->rx_packets++;
+       tstats->rx_bytes += skb->len;
+       u64_stats_update_end(&tstats->syncp);
+
+       return 0;
+}
+
+static int
+xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net_device_stats *stats = &xi->dev->stats;
+       struct dst_entry *dst = skb_dst(skb);
+       unsigned int length = skb->len;
+       struct net_device *tdev;
+       struct xfrm_state *x;
+       int err = -1;
+       int mtu;
+
+       if (!dst)
+               goto tx_err_link_failure;
+
+       dst_hold(dst);
+       dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               dst = NULL;
+               goto tx_err_link_failure;
+       }
+
+       x = dst->xfrm;
+       if (!x)
+               goto tx_err_link_failure;
+
+       if (x->if_id != xi->p.if_id)
+               goto tx_err_link_failure;
+
+       tdev = dst->dev;
+
+       if (tdev == dev) {
+               stats->collisions++;
+               net_warn_ratelimited("%s: Local routing loop detected!\n",
+                                    xi->p.name);
+               goto tx_err_dst_release;
+       }
+
+       mtu = dst_mtu(dst);
+       if (!skb->ignore_df && skb->len > mtu) {
+               skb_dst_update_pmtu(skb, mtu);
+
+               if (skb->protocol == htons(ETH_P_IPV6)) {
+                       if (mtu < IPV6_MIN_MTU)
+                               mtu = IPV6_MIN_MTU;
+
+                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+               } else {
+                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+                                 htonl(mtu));
+               }
+
+               dst_release(dst);
+               return -EMSGSIZE;
+       }
+
+       xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
+       skb_dst_set(skb, dst);
+       skb->dev = tdev;
+
+       err = dst_output(xi->net, skb->sk, skb);
+       if (net_xmit_eval(err) == 0) {
+               struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+               u64_stats_update_begin(&tstats->syncp);
+               tstats->tx_bytes += length;
+               tstats->tx_packets++;
+               u64_stats_update_end(&tstats->syncp);
+       } else {
+               stats->tx_errors++;
+               stats->tx_aborted_errors++;
+       }
+
+       return 0;
+tx_err_link_failure:
+       stats->tx_carrier_errors++;
+       dst_link_failure(skb);
+tx_err_dst_release:
+       dst_release(dst);
+       return err;
+}
+
+static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net_device_stats *stats = &xi->dev->stats;
+       struct flowi fl;
+       int ret;
+
+       memset(&fl, 0, sizeof(fl));
+
+       switch (skb->protocol) {
+       case htons(ETH_P_IPV6):
+               xfrm_decode_session(skb, &fl, AF_INET6);
+               memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+               break;
+       case htons(ETH_P_IP):
+               xfrm_decode_session(skb, &fl, AF_INET);
+               memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+               break;
+       default:
+               goto tx_err;
+       }
+
+       fl.flowi_oif = xi->phydev->ifindex;
+
+       ret = xfrmi_xmit2(skb, dev, &fl);
+       if (ret < 0)
+               goto tx_err;
+
+       return NETDEV_TX_OK;
+
+tx_err:
+       stats->tx_errors++;
+       stats->tx_dropped++;
+       kfree_skb(skb);
+       return NETDEV_TX_OK;
+}
+
+static int xfrmi4_err(struct sk_buff *skb, u32 info)
+{
+       const struct iphdr *iph = (const struct iphdr *)skb->data;
+       struct net *net = dev_net(skb->dev);
+       int protocol = iph->protocol;
+       struct ip_comp_hdr *ipch;
+       struct ip_esp_hdr *esph;
+       struct ip_auth_hdr *ah ;
+       struct xfrm_state *x;
+       struct xfrm_if *xi;
+       __be32 spi;
+
+       switch (protocol) {
+       case IPPROTO_ESP:
+               esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
+               spi = esph->spi;
+               break;
+       case IPPROTO_AH:
+               ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
+               spi = ah->spi;
+               break;
+       case IPPROTO_COMP:
+               ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+               spi = htonl(ntohs(ipch->cpi));
+               break;
+       default:
+               return 0;
+       }
+
+       switch (icmp_hdr(skb)->type) {
+       case ICMP_DEST_UNREACH:
+               if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+                       return 0;
+       case ICMP_REDIRECT:
+               break;
+       default:
+               return 0;
+       }
+
+       x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+                             spi, protocol, AF_INET);
+       if (!x)
+               return 0;
+
+       xi = xfrmi_lookup(net, x);
+       if (!xi) {
+               xfrm_state_put(x);
+               return -1;
+       }
+
+       if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+               ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+       else
+               ipv4_redirect(skb, net, 0, 0, protocol, 0);
+       xfrm_state_put(x);
+
+       return 0;
+}
+
+static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+                   u8 type, u8 code, int offset, __be32 info)
+{
+       const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+       struct net *net = dev_net(skb->dev);
+       int protocol = iph->nexthdr;
+       struct ip_comp_hdr *ipch;
+       struct ip_esp_hdr *esph;
+       struct ip_auth_hdr *ah;
+       struct xfrm_state *x;
+       struct xfrm_if *xi;
+       __be32 spi;
+
+       switch (protocol) {
+       case IPPROTO_ESP:
+               esph = (struct ip_esp_hdr *)(skb->data + offset);
+               spi = esph->spi;
+               break;
+       case IPPROTO_AH:
+               ah = (struct ip_auth_hdr *)(skb->data + offset);
+               spi = ah->spi;
+               break;
+       case IPPROTO_COMP:
+               ipch = (struct ip_comp_hdr *)(skb->data + offset);
+               spi = htonl(ntohs(ipch->cpi));
+               break;
+       default:
+               return 0;
+       }
+
+       if (type != ICMPV6_PKT_TOOBIG &&
+           type != NDISC_REDIRECT)
+               return 0;
+
+       x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+                             spi, protocol, AF_INET6);
+       if (!x)
+               return 0;
+
+       xi = xfrmi_lookup(net, x);
+       if (!xi) {
+               xfrm_state_put(x);
+               return -1;
+       }
+
+       if (type == NDISC_REDIRECT)
+               ip6_redirect(skb, net, skb->dev->ifindex, 0,
+                            sock_net_uid(net, NULL));
+       else
+               ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
+       xfrm_state_put(x);
+
+       return 0;
+}
+
+static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
+{
+       if (xi->p.link != p->link)
+               return -EINVAL;
+
+       xi->p.if_id = p->if_id;
+
+       return 0;
+}
+
+static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
+{
+       struct net *net = dev_net(xi->dev);
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+       int err;
+
+       xfrmi_unlink(xfrmn, xi);
+       synchronize_net();
+       err = xfrmi_change(xi, p);
+       xfrmi_link(xfrmn, xi);
+       netdev_state_change(xi->dev);
+       return err;
+}
+
+static void xfrmi_get_stats64(struct net_device *dev,
+                              struct rtnl_link_stats64 *s)
+{
+       int cpu;
+
+       if (!dev->tstats)
+               return;
+
+       for_each_possible_cpu(cpu) {
+               struct pcpu_sw_netstats *stats;
+               struct pcpu_sw_netstats tmp;
+               int start;
+
+               stats = per_cpu_ptr(dev->tstats, cpu);
+               do {
+                       start = u64_stats_fetch_begin_irq(&stats->syncp);
+                       tmp.rx_packets = stats->rx_packets;
+                       tmp.rx_bytes   = stats->rx_bytes;
+                       tmp.tx_packets = stats->tx_packets;
+                       tmp.tx_bytes   = stats->tx_bytes;
+               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+               s->rx_packets += tmp.rx_packets;
+               s->rx_bytes   += tmp.rx_bytes;
+               s->tx_packets += tmp.tx_packets;
+               s->tx_bytes   += tmp.tx_bytes;
+       }
+
+       s->rx_dropped = dev->stats.rx_dropped;
+       s->tx_dropped = dev->stats.tx_dropped;
+}
+
+static int xfrmi_get_iflink(const struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+
+       return xi->phydev->ifindex;
+}
+
+
+static const struct net_device_ops xfrmi_netdev_ops = {
+       .ndo_init       = xfrmi_dev_init,
+       .ndo_uninit     = xfrmi_dev_uninit,
+       .ndo_start_xmit = xfrmi_xmit,
+       .ndo_get_stats64 = xfrmi_get_stats64,
+       .ndo_get_iflink = xfrmi_get_iflink,
+};
+
+static void xfrmi_dev_setup(struct net_device *dev)
+{
+       dev->netdev_ops         = &xfrmi_netdev_ops;
+       dev->type               = ARPHRD_NONE;
+       dev->hard_header_len    = ETH_HLEN;
+       dev->min_header_len     = ETH_HLEN;
+       dev->mtu                = ETH_DATA_LEN;
+       dev->min_mtu            = ETH_MIN_MTU;
+       dev->max_mtu            = ETH_DATA_LEN;
+       dev->addr_len           = ETH_ALEN;
+       dev->flags              = IFF_NOARP;
+       dev->needs_free_netdev  = true;
+       dev->priv_destructor    = xfrmi_dev_free;
+       netif_keep_dst(dev);
+}
+
+static int xfrmi_dev_init(struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net_device *phydev = xi->phydev;
+       int err;
+
+       dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+       if (!dev->tstats)
+               return -ENOMEM;
+
+       err = gro_cells_init(&xi->gro_cells, dev);
+       if (err) {
+               free_percpu(dev->tstats);
+               return err;
+       }
+
+       dev->features |= NETIF_F_LLTX;
+
+       dev->needed_headroom = phydev->needed_headroom;
+       dev->needed_tailroom = phydev->needed_tailroom;
+
+       if (is_zero_ether_addr(dev->dev_addr))
+               eth_hw_addr_inherit(dev, phydev);
+       if (is_zero_ether_addr(dev->broadcast))
+               memcpy(dev->broadcast, phydev->broadcast, dev->addr_len);
+
+       return 0;
+}
+
+static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[],
+                        struct netlink_ext_ack *extack)
+{
+       return 0;
+}
+
+static void xfrmi_netlink_parms(struct nlattr *data[],
+                              struct xfrm_if_parms *parms)
+{
+       memset(parms, 0, sizeof(*parms));
+
+       if (!data)
+               return;
+
+       if (data[IFLA_XFRM_LINK])
+               parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
+
+       if (data[IFLA_XFRM_IF_ID])
+               parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
+}
+
+static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
+                       struct nlattr *tb[], struct nlattr *data[],
+                       struct netlink_ext_ack *extack)
+{
+       struct net *net = dev_net(dev);
+       struct xfrm_if_parms *p;
+       struct xfrm_if *xi;
+
+       xi = netdev_priv(dev);
+       p = &xi->p;
+
+       xfrmi_netlink_parms(data, p);
+
+       if (!tb[IFLA_IFNAME])
+               return -EINVAL;
+
+       nla_strlcpy(p->name, tb[IFLA_IFNAME], IFNAMSIZ);
+
+       xi = xfrmi_locate(net, p, 1);
+       return PTR_ERR_OR_ZERO(xi);
+}
+
+static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
+{
+       unregister_netdevice_queue(dev, head);
+}
+
+static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
+                          struct nlattr *data[],
+                          struct netlink_ext_ack *extack)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net *net = dev_net(dev);
+
+       xfrmi_netlink_parms(data, &xi->p);
+
+       xi = xfrmi_locate(net, &xi->p, 0);
+
+       if (IS_ERR_OR_NULL(xi)) {
+               xi = netdev_priv(dev);
+       } else {
+               if (xi->dev != dev)
+                       return -EEXIST;
+       }
+
+       return xfrmi_update(xi, &xi->p);
+}
+
+static size_t xfrmi_get_size(const struct net_device *dev)
+{
+       return
+               /* IFLA_XFRM_LINK */
+               nla_total_size(4) +
+               /* IFLA_XFRM_IF_ID */
+               nla_total_size(4) +
+               0;
+}
+
+static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct xfrm_if_parms *parm = &xi->p;
+
+       if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
+           nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -EMSGSIZE;
+}
+
+struct net *xfrmi_get_link_net(const struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+
+       return dev_net(xi->phydev);
+}
+
+static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
+       [IFLA_XFRM_LINK]        = { .type = NLA_U32 },
+       [IFLA_XFRM_IF_ID]       = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
+       .kind           = "xfrm",
+       .maxtype        = IFLA_XFRM_MAX,
+       .policy         = xfrmi_policy,
+       .priv_size      = sizeof(struct xfrm_if),
+       .setup          = xfrmi_dev_setup,
+       .validate       = xfrmi_validate,
+       .newlink        = xfrmi_newlink,
+       .dellink        = xfrmi_dellink,
+       .changelink     = xfrmi_changelink,
+       .get_size       = xfrmi_get_size,
+       .fill_info      = xfrmi_fill_info,
+       .get_link_net   = xfrmi_get_link_net,
+};
+
+static void __net_exit xfrmi_destroy_interfaces(struct xfrmi_net *xfrmn)
+{
+       struct xfrm_if *xi;
+       LIST_HEAD(list);
+
+       xi = rtnl_dereference(xfrmn->xfrmi[0]);
+       if (!xi)
+               return;
+
+       unregister_netdevice_queue(xi->dev, &list);
+       unregister_netdevice_many(&list);
+}
+
+static int __net_init xfrmi_init_net(struct net *net)
+{
+       return 0;
+}
+
+static void __net_exit xfrmi_exit_net(struct net *net)
+{
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+
+       rtnl_lock();
+       xfrmi_destroy_interfaces(xfrmn);
+       rtnl_unlock();
+}
+
+static struct pernet_operations xfrmi_net_ops = {
+       .init = xfrmi_init_net,
+       .exit = xfrmi_exit_net,
+       .id   = &xfrmi_net_id,
+       .size = sizeof(struct xfrmi_net),
+};
+
+static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
+       .handler        =       xfrm6_rcv,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi6_err,
+       .priority       =       10,
+};
+
+static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
+       .handler        =       xfrm6_rcv,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi6_err,
+       .priority       =       10,
+};
+
+static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
+       .handler        =       xfrm6_rcv,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi6_err,
+       .priority       =       10,
+};
+
+static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
+       .handler        =       xfrm4_rcv,
+       .input_handler  =       xfrm_input,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi4_err,
+       .priority       =       10,
+};
+
+static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
+       .handler        =       xfrm4_rcv,
+       .input_handler  =       xfrm_input,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi4_err,
+       .priority       =       10,
+};
+
+static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
+       .handler        =       xfrm4_rcv,
+       .input_handler  =       xfrm_input,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi4_err,
+       .priority       =       10,
+};
+
+static int __init xfrmi4_init(void)
+{
+       int err;
+
+       err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
+       if (err < 0)
+               goto xfrm_proto_esp_failed;
+       err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
+       if (err < 0)
+               goto xfrm_proto_ah_failed;
+       err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+       if (err < 0)
+               goto xfrm_proto_comp_failed;
+
+       return 0;
+
+xfrm_proto_comp_failed:
+       xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
+       xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
+       return err;
+}
+
+static void xfrmi4_fini(void)
+{
+       xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+       xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
+       xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
+}
+
+static int __init xfrmi6_init(void)
+{
+       int err;
+
+       err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
+       if (err < 0)
+               goto xfrm_proto_esp_failed;
+       err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
+       if (err < 0)
+               goto xfrm_proto_ah_failed;
+       err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+       if (err < 0)
+               goto xfrm_proto_comp_failed;
+
+       return 0;
+
+xfrm_proto_comp_failed:
+       xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
+       xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
+       return err;
+}
+
+static void xfrmi6_fini(void)
+{
+       xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+       xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
+       xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
+}
+
+static const struct xfrm_if_cb xfrm_if_cb = {
+       .decode_session =       xfrmi_decode_session,
+};
+
+static int __init xfrmi_init(void)
+{
+       const char *msg;
+       int err;
+
+       pr_info("IPsec XFRM device driver\n");
+
+       msg = "tunnel device";
+       err = register_pernet_device(&xfrmi_net_ops);
+       if (err < 0)
+               goto pernet_dev_failed;
+
+       msg = "xfrm4 protocols";
+       err = xfrmi4_init();
+       if (err < 0)
+               goto xfrmi4_failed;
+
+       msg = "xfrm6 protocols";
+       err = xfrmi6_init();
+       if (err < 0)
+               goto xfrmi6_failed;
+
+
+       msg = "netlink interface";
+       err = rtnl_link_register(&xfrmi_link_ops);
+       if (err < 0)
+               goto rtnl_link_failed;
+
+       xfrm_if_register_cb(&xfrm_if_cb);
+
+       return err;
+
+rtnl_link_failed:
+       xfrmi6_fini();
+xfrmi6_failed:
+       xfrmi4_fini();
+xfrmi4_failed:
+       unregister_pernet_device(&xfrmi_net_ops);
+pernet_dev_failed:
+       pr_err("xfrmi init: failed to register %s\n", msg);
+       return err;
+}
+
+static void __exit xfrmi_fini(void)
+{
+       xfrm_if_unregister_cb();
+       rtnl_link_unregister(&xfrmi_link_ops);
+       xfrmi4_fini();
+       xfrmi6_fini();
+       unregister_pernet_device(&xfrmi_net_ops);
+}
+
+module_init(xfrmi_init);
+module_exit(xfrmi_fini);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("xfrm");
+MODULE_ALIAS_NETDEV("xfrm0");
+MODULE_AUTHOR("Steffen Klassert");
+MODULE_DESCRIPTION("XFRM virtual interface");
index 89b178a78dc7894ac477b876e3e25320c8265f85..45ba07ab3e4f8d322e564c902774706ec09bcf9c 100644 (file)
@@ -66,8 +66,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
                        goto error_nolock;
                }
 
-               if (x->props.output_mark)
-                       skb->mark = x->props.output_mark;
+               skb->mark = xfrm_smark_get(skb->mark, x);
 
                err = x->outer_mode->output(x, skb);
                if (err) {
index 7c5e8978aeaabfe417d86f943f1576f30571da4c..3110c3fbee2099e7a4563a99c988e5ad66d0658c 100644 (file)
@@ -45,8 +45,9 @@ struct xfrm_flo {
        u8 flags;
 };
 
-static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
-static struct work_struct *xfrm_pcpu_work __read_mostly;
+static DEFINE_SPINLOCK(xfrm_if_cb_lock);
+static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
+
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
                                                __read_mostly;
@@ -119,6 +120,12 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
        return afinfo;
 }
 
+/* Called with rcu_read_lock(). */
+static const struct xfrm_if_cb *xfrm_if_get_cb(void)
+{
+       return rcu_dereference(xfrm_if_cb);
+}
+
 struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
                                    const xfrm_address_t *saddr,
                                    const xfrm_address_t *daddr,
@@ -182,8 +189,8 @@ static inline unsigned long make_jiffies(long secs)
 static void xfrm_policy_timer(struct timer_list *t)
 {
        struct xfrm_policy *xp = from_timer(xp, t, timer);
-       unsigned long now = get_seconds();
-       long next = LONG_MAX;
+       time64_t now = ktime_get_real_seconds();
+       time64_t next = TIME64_MAX;
        int warn = 0;
        int dir;
 
@@ -195,7 +202,7 @@ static void xfrm_policy_timer(struct timer_list *t)
        dir = xfrm_policy_id2dir(xp->index);
 
        if (xp->lft.hard_add_expires_seconds) {
-               long tmo = xp->lft.hard_add_expires_seconds +
+               time64_t tmo = xp->lft.hard_add_expires_seconds +
                        xp->curlft.add_time - now;
                if (tmo <= 0)
                        goto expired;
@@ -203,7 +210,7 @@ static void xfrm_policy_timer(struct timer_list *t)
                        next = tmo;
        }
        if (xp->lft.hard_use_expires_seconds) {
-               long tmo = xp->lft.hard_use_expires_seconds +
+               time64_t tmo = xp->lft.hard_use_expires_seconds +
                        (xp->curlft.use_time ? : xp->curlft.add_time) - now;
                if (tmo <= 0)
                        goto expired;
@@ -211,7 +218,7 @@ static void xfrm_policy_timer(struct timer_list *t)
                        next = tmo;
        }
        if (xp->lft.soft_add_expires_seconds) {
-               long tmo = xp->lft.soft_add_expires_seconds +
+               time64_t tmo = xp->lft.soft_add_expires_seconds +
                        xp->curlft.add_time - now;
                if (tmo <= 0) {
                        warn = 1;
@@ -221,7 +228,7 @@ static void xfrm_policy_timer(struct timer_list *t)
                        next = tmo;
        }
        if (xp->lft.soft_use_expires_seconds) {
-               long tmo = xp->lft.soft_use_expires_seconds +
+               time64_t tmo = xp->lft.soft_use_expires_seconds +
                        (xp->curlft.use_time ? : xp->curlft.add_time) - now;
                if (tmo <= 0) {
                        warn = 1;
@@ -233,7 +240,7 @@ static void xfrm_policy_timer(struct timer_list *t)
 
        if (warn)
                km_policy_expired(xp, dir, 0, 0);
-       if (next != LONG_MAX &&
+       if (next != TIME64_MAX &&
            !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
                xfrm_pol_hold(xp);
 
@@ -747,6 +754,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
        newpos = NULL;
        hlist_for_each_entry(pol, chain, bydst) {
                if (pol->type == policy->type &&
+                   pol->if_id == policy->if_id &&
                    !selector_cmp(&pol->selector, &policy->selector) &&
                    xfrm_policy_mark_match(policy, pol) &&
                    xfrm_sec_ctx_match(pol->security, policy->security) &&
@@ -783,7 +791,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
        }
        policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
        hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
-       policy->curlft.add_time = get_seconds();
+       policy->curlft.add_time = ktime_get_real_seconds();
        policy->curlft.use_time = 0;
        if (!mod_timer(&policy->timer, jiffies + HZ))
                xfrm_pol_hold(policy);
@@ -798,8 +806,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
-                                         int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
+                                         u8 type, int dir,
+                                         struct xfrm_selector *sel,
                                          struct xfrm_sec_ctx *ctx, int delete,
                                          int *err)
 {
@@ -812,6 +821,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
        ret = NULL;
        hlist_for_each_entry(pol, chain, bydst) {
                if (pol->type == type &&
+                   pol->if_id == if_id &&
                    (mark & pol->mark.m) == pol->mark.v &&
                    !selector_cmp(sel, &pol->selector) &&
                    xfrm_sec_ctx_match(ctx, pol->security)) {
@@ -837,8 +847,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
-                                    int dir, u32 id, int delete, int *err)
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
+                                    u8 type, int dir, u32 id, int delete,
+                                    int *err)
 {
        struct xfrm_policy *pol, *ret;
        struct hlist_head *chain;
@@ -853,6 +864,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
        ret = NULL;
        hlist_for_each_entry(pol, chain, byidx) {
                if (pol->type == type && pol->index == id &&
+                   pol->if_id == if_id &&
                    (mark & pol->mark.m) == pol->mark.v) {
                        xfrm_pol_hold(pol);
                        if (delete) {
@@ -1056,13 +1068,14 @@ EXPORT_SYMBOL(xfrm_policy_walk_done);
  */
 static int xfrm_policy_match(const struct xfrm_policy *pol,
                             const struct flowi *fl,
-                            u8 type, u16 family, int dir)
+                            u8 type, u16 family, int dir, u32 if_id)
 {
        const struct xfrm_selector *sel = &pol->selector;
        int ret = -ESRCH;
        bool match;
 
        if (pol->family != family ||
+           pol->if_id != if_id ||
            (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
            pol->type != type)
                return ret;
@@ -1077,7 +1090,8 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
 
 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
                                                     const struct flowi *fl,
-                                                    u16 family, u8 dir)
+                                                    u16 family, u8 dir,
+                                                    u32 if_id)
 {
        int err;
        struct xfrm_policy *pol, *ret;
@@ -1101,7 +1115,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
        priority = ~0U;
        ret = NULL;
        hlist_for_each_entry_rcu(pol, chain, bydst) {
-               err = xfrm_policy_match(pol, fl, type, family, dir);
+               err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
                if (err) {
                        if (err == -ESRCH)
                                continue;
@@ -1120,7 +1134,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
                if ((pol->priority >= priority) && ret)
                        break;
 
-               err = xfrm_policy_match(pol, fl, type, family, dir);
+               err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
                if (err) {
                        if (err == -ESRCH)
                                continue;
@@ -1145,21 +1159,25 @@ fail:
        return ret;
 }
 
-static struct xfrm_policy *
-xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+static struct xfrm_policy *xfrm_policy_lookup(struct net *net,
+                                             const struct flowi *fl,
+                                             u16 family, u8 dir, u32 if_id)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
        struct xfrm_policy *pol;
 
-       pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
+       pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family,
+                                       dir, if_id);
        if (pol != NULL)
                return pol;
 #endif
-       return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+       return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family,
+                                        dir, if_id);
 }
 
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
-                                                const struct flowi *fl, u16 family)
+                                                const struct flowi *fl,
+                                                u16 family, u32 if_id)
 {
        struct xfrm_policy *pol;
 
@@ -1177,7 +1195,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 
                match = xfrm_selector_match(&pol->selector, fl, family);
                if (match) {
-                       if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
+                       if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
+                           pol->if_id != if_id) {
                                pol = NULL;
                                goto out;
                        }
@@ -1268,7 +1287,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
        old_pol = rcu_dereference_protected(sk->sk_policy[dir],
                                lockdep_is_held(&net->xfrm.xfrm_policy_lock));
        if (pol) {
-               pol->curlft.add_time = get_seconds();
+               pol->curlft.add_time = ktime_get_real_seconds();
                pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
                xfrm_sk_policy_link(pol, dir);
        }
@@ -1305,6 +1324,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
                newp->lft = old->lft;
                newp->curlft = old->curlft;
                newp->mark = old->mark;
+               newp->if_id = old->if_id;
                newp->action = old->action;
                newp->flags = old->flags;
                newp->xfrm_nr = old->xfrm_nr;
@@ -1390,7 +1410,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
                        }
                }
 
-               x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
+               x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
+                                   family, policy->if_id);
 
                if (x && x->km.state == XFRM_STATE_VALID) {
                        xfrm[nx++] = x;
@@ -1607,10 +1628,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
                dst_copy_metrics(dst1, dst);
 
                if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
+                       __u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
+
                        family = xfrm[i]->props.family;
                        dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
-                                             &saddr, &daddr, family,
-                                             xfrm[i]->props.output_mark);
+                                             &saddr, &daddr, family, mark);
                        err = PTR_ERR(dst);
                        if (IS_ERR(dst))
                                goto put_states;
@@ -1692,7 +1714,8 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
                pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
                                                    XFRM_POLICY_TYPE_MAIN,
                                                    fl, family,
-                                                   XFRM_POLICY_OUT);
+                                                   XFRM_POLICY_OUT,
+                                                   pols[0]->if_id);
                if (pols[1]) {
                        if (IS_ERR(pols[1])) {
                                xfrm_pols_put(pols, *num_pols);
@@ -1714,108 +1737,6 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 
 }
 
-static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
-{
-       this_cpu_write(xfrm_last_dst, xdst);
-       if (old)
-               dst_release(&old->u.dst);
-}
-
-static void __xfrm_pcpu_work_fn(void)
-{
-       struct xfrm_dst *old;
-
-       old = this_cpu_read(xfrm_last_dst);
-       if (old && !xfrm_bundle_ok(old))
-               xfrm_last_dst_update(NULL, old);
-}
-
-static void xfrm_pcpu_work_fn(struct work_struct *work)
-{
-       local_bh_disable();
-       rcu_read_lock();
-       __xfrm_pcpu_work_fn();
-       rcu_read_unlock();
-       local_bh_enable();
-}
-
-void xfrm_policy_cache_flush(void)
-{
-       struct xfrm_dst *old;
-       bool found = false;
-       int cpu;
-
-       might_sleep();
-
-       local_bh_disable();
-       rcu_read_lock();
-       for_each_possible_cpu(cpu) {
-               old = per_cpu(xfrm_last_dst, cpu);
-               if (old && !xfrm_bundle_ok(old)) {
-                       if (smp_processor_id() == cpu) {
-                               __xfrm_pcpu_work_fn();
-                               continue;
-                       }
-                       found = true;
-                       break;
-               }
-       }
-
-       rcu_read_unlock();
-       local_bh_enable();
-
-       if (!found)
-               return;
-
-       get_online_cpus();
-
-       for_each_possible_cpu(cpu) {
-               bool bundle_release;
-
-               rcu_read_lock();
-               old = per_cpu(xfrm_last_dst, cpu);
-               bundle_release = old && !xfrm_bundle_ok(old);
-               rcu_read_unlock();
-
-               if (!bundle_release)
-                       continue;
-
-               if (cpu_online(cpu)) {
-                       schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
-                       continue;
-               }
-
-               rcu_read_lock();
-               old = per_cpu(xfrm_last_dst, cpu);
-               if (old && !xfrm_bundle_ok(old)) {
-                       per_cpu(xfrm_last_dst, cpu) = NULL;
-                       dst_release(&old->u.dst);
-               }
-               rcu_read_unlock();
-       }
-
-       put_online_cpus();
-}
-
-static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
-                               struct xfrm_state * const xfrm[],
-                               int num)
-{
-       const struct dst_entry *dst = &xdst->u.dst;
-       int i;
-
-       if (xdst->num_xfrms != num)
-               return false;
-
-       for (i = 0; i < num; i++) {
-               if (!dst || dst->xfrm != xfrm[i])
-                       return false;
-               dst = xfrm_dst_child(dst);
-       }
-
-       return xfrm_bundle_ok(xdst);
-}
-
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
                               const struct flowi *fl, u16 family,
@@ -1824,34 +1745,21 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
        struct net *net = xp_net(pols[0]);
        struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
        struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
-       struct xfrm_dst *xdst, *old;
+       struct xfrm_dst *xdst;
        struct dst_entry *dst;
        int err;
 
        /* Try to instantiate a bundle */
        err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
        if (err <= 0) {
-               if (err != 0 && err != -EAGAIN)
+               if (err == 0)
+                       return NULL;
+
+               if (err != -EAGAIN)
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
                return ERR_PTR(err);
        }
 
-       xdst = this_cpu_read(xfrm_last_dst);
-       if (xdst &&
-           xdst->u.dst.dev == dst_orig->dev &&
-           xdst->num_pols == num_pols &&
-           memcmp(xdst->pols, pols,
-                  sizeof(struct xfrm_policy *) * num_pols) == 0 &&
-           xfrm_xdst_can_reuse(xdst, xfrm, err)) {
-               dst_hold(&xdst->u.dst);
-               xfrm_pols_put(pols, num_pols);
-               while (err > 0)
-                       xfrm_state_put(xfrm[--err]);
-               return xdst;
-       }
-
-       old = xdst;
-
        dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
        if (IS_ERR(dst)) {
                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
@@ -1864,9 +1772,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
        memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
        xdst->policy_genid = atomic_read(&pols[0]->genid);
 
-       atomic_set(&xdst->u.dst.__refcnt, 2);
-       xfrm_last_dst_update(xdst, old);
-
        return xdst;
 }
 
@@ -2047,8 +1952,10 @@ free_dst:
        goto out;
 }
 
-static struct xfrm_dst *
-xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
+static struct xfrm_dst *xfrm_bundle_lookup(struct net *net,
+                                          const struct flowi *fl,
+                                          u16 family, u8 dir,
+                                          struct xfrm_flo *xflo, u32 if_id)
 {
        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
        int num_pols = 0, num_xfrms = 0, err;
@@ -2057,7 +1964,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
        /* Resolve policies to use if we couldn't get them from
         * previous cache entry */
        num_pols = 1;
-       pols[0] = xfrm_policy_lookup(net, fl, family, dir);
+       pols[0] = xfrm_policy_lookup(net, fl, family, dir, if_id);
        err = xfrm_expand_policies(fl, family, pols,
                                           &num_pols, &num_xfrms);
        if (err < 0)
@@ -2067,13 +1974,15 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
        if (num_xfrms <= 0)
                goto make_dummy_bundle;
 
-       local_bh_disable();
        xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
                                              xflo->dst_orig);
-       local_bh_enable();
-
        if (IS_ERR(xdst)) {
                err = PTR_ERR(xdst);
+               if (err == -EREMOTE) {
+                       xfrm_pols_put(pols, num_pols);
+                       return NULL;
+               }
+
                if (err != -EAGAIN)
                        goto error;
                goto make_dummy_bundle;
@@ -2123,14 +2032,19 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family,
        return ret;
 }
 
-/* Main function: finds/creates a bundle for given flow.
+/* Finds/creates a bundle for given flow and if_id
  *
  * At the moment we eat a raw IP route. Mostly to speed up lookups
  * on interfaces with disabled IPsec.
+ *
+ * xfrm_lookup uses an if_id of 0 by default, and is provided for
+ * compatibility
  */
-struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
-                             const struct flowi *fl,
-                             const struct sock *sk, int flags)
+struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
+                                       struct dst_entry *dst_orig,
+                                       const struct flowi *fl,
+                                       const struct sock *sk,
+                                       int flags, u32 if_id)
 {
        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
        struct xfrm_dst *xdst;
@@ -2146,7 +2060,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
        sk = sk_const_to_full_sk(sk);
        if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
                num_pols = 1;
-               pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
+               pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family,
+                                               if_id);
                err = xfrm_expand_policies(fl, family, pols,
                                           &num_pols, &num_xfrms);
                if (err < 0)
@@ -2158,15 +2073,16 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
                                goto no_transform;
                        }
 
-                       local_bh_disable();
                        xdst = xfrm_resolve_and_create_bundle(
                                        pols, num_pols, fl,
                                        family, dst_orig);
-                       local_bh_enable();
 
                        if (IS_ERR(xdst)) {
                                xfrm_pols_put(pols, num_pols);
                                err = PTR_ERR(xdst);
+                               if (err == -EREMOTE)
+                                       goto nopol;
+
                                goto dropdst;
                        } else if (xdst == NULL) {
                                num_xfrms = 0;
@@ -2189,7 +2105,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
                    !net->xfrm.policy_count[XFRM_POLICY_OUT])
                        goto nopol;
 
-               xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+               xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id);
                if (xdst == NULL)
                        goto nopol;
                if (IS_ERR(xdst)) {
@@ -2234,7 +2150,7 @@ no_transform:
        }
 
        for (i = 0; i < num_pols; i++)
-               pols[i]->curlft.use_time = get_seconds();
+               pols[i]->curlft.use_time = ktime_get_real_seconds();
 
        if (num_xfrms < 0) {
                /* Prohibit the flow */
@@ -2270,6 +2186,19 @@ dropdst:
        xfrm_pols_put(pols, drop_pols);
        return ERR_PTR(err);
 }
+EXPORT_SYMBOL(xfrm_lookup_with_ifid);
+
+/* Main function: finds/creates a bundle for given flow.
+ *
+ * At the moment we eat a raw IP route. Mostly to speed up lookups
+ * on interfaces with disabled IPsec.
+ */
+struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
+                             const struct flowi *fl, const struct sock *sk,
+                             int flags)
+{
+       return xfrm_lookup_with_ifid(net, dst_orig, fl, sk, flags, 0);
+}
 EXPORT_SYMBOL(xfrm_lookup);
 
 /* Callers of xfrm_lookup_route() must ensure a call to dst_output().
@@ -2368,6 +2297,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
                return -EAFNOSUPPORT;
 
        afinfo->decode_session(skb, fl, reverse);
+
        err = security_xfrm_decode_session(skb, &fl->flowi_secid);
        rcu_read_unlock();
        return err;
@@ -2398,6 +2328,19 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
        int reverse;
        struct flowi fl;
        int xerr_idx = -1;
+       const struct xfrm_if_cb *ifcb;
+       struct xfrm_if *xi;
+       u32 if_id = 0;
+
+       rcu_read_lock();
+       ifcb = xfrm_if_get_cb();
+
+       if (ifcb) {
+               xi = ifcb->decode_session(skb);
+               if (xi)
+                       if_id = xi->p.if_id;
+       }
+       rcu_read_unlock();
 
        reverse = dir & ~XFRM_POLICY_MASK;
        dir &= XFRM_POLICY_MASK;
@@ -2425,7 +2368,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
        pol = NULL;
        sk = sk_to_full_sk(sk);
        if (sk && sk->sk_policy[dir]) {
-               pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
+               pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id);
                if (IS_ERR(pol)) {
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
                        return 0;
@@ -2433,7 +2376,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
        }
 
        if (!pol)
-               pol = xfrm_policy_lookup(net, &fl, family, dir);
+               pol = xfrm_policy_lookup(net, &fl, family, dir, if_id);
 
        if (IS_ERR(pol)) {
                XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@ -2449,7 +2392,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
                return 1;
        }
 
-       pol->curlft.use_time = get_seconds();
+       pol->curlft.use_time = ktime_get_real_seconds();
 
        pols[0] = pol;
        npols++;
@@ -2457,13 +2400,13 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
        if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
                pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
                                                    &fl, family,
-                                                   XFRM_POLICY_IN);
+                                                   XFRM_POLICY_IN, if_id);
                if (pols[1]) {
                        if (IS_ERR(pols[1])) {
                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
                                return 0;
                        }
-                       pols[1]->curlft.use_time = get_seconds();
+                       pols[1]->curlft.use_time = ktime_get_real_seconds();
                        npols++;
                }
        }
@@ -2822,6 +2765,21 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
 }
 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
 
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb)
+{
+       spin_lock(&xfrm_if_cb_lock);
+       rcu_assign_pointer(xfrm_if_cb, ifcb);
+       spin_unlock(&xfrm_if_cb_lock);
+}
+EXPORT_SYMBOL(xfrm_if_register_cb);
+
+void xfrm_if_unregister_cb(void)
+{
+       RCU_INIT_POINTER(xfrm_if_cb, NULL);
+       synchronize_rcu();
+}
+EXPORT_SYMBOL(xfrm_if_unregister_cb);
+
 #ifdef CONFIG_XFRM_STATISTICS
 static int __net_init xfrm_statistics_init(struct net *net)
 {
@@ -2989,19 +2947,13 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
-       int i;
-
-       xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
-                                      GFP_KERNEL);
-       BUG_ON(!xfrm_pcpu_work);
-
-       for (i = 0; i < NR_CPUS; i++)
-               INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
-
        register_pernet_subsys(&xfrm_net_ops);
        xfrm_dev_init();
        seqcount_init(&xfrm_policy_hash_generation);
        xfrm_input_init();
+
+       RCU_INIT_POINTER(xfrm_if_cb, NULL);
+       synchronize_rcu();
 }
 
 #ifdef CONFIG_AUDITSYSCALL
index 8308281f32530bd9103e7f7bd06472824600c526..b669262682c9763e7c863d6bb77f44ed34402cce 100644 (file)
@@ -475,8 +475,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 {
        struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
        struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
-       unsigned long now = get_seconds();
-       long next = LONG_MAX;
+       time64_t now = ktime_get_real_seconds();
+       time64_t next = TIME64_MAX;
        int warn = 0;
        int err = 0;
 
@@ -537,7 +537,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
        if (warn)
                km_state_expired(x, 0, 0);
 resched:
-       if (next != LONG_MAX) {
+       if (next != TIME64_MAX) {
                tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
        }
 
@@ -577,7 +577,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
                tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
                                        CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
                timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
-               x->curlft.add_time = get_seconds();
+               x->curlft.add_time = ktime_get_real_seconds();
                x->lft.soft_byte_limit = XFRM_INF;
                x->lft.soft_packet_limit = XFRM_INF;
                x->lft.hard_byte_limit = XFRM_INF;
@@ -735,10 +735,9 @@ restart:
        }
 out:
        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
-       if (cnt) {
+       if (cnt)
                err = 0;
-               xfrm_policy_cache_flush();
-       }
+
        return err;
 }
 EXPORT_SYMBOL(xfrm_state_flush);
@@ -931,7 +930,7 @@ struct xfrm_state *
 xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
                const struct flowi *fl, struct xfrm_tmpl *tmpl,
                struct xfrm_policy *pol, int *err,
-               unsigned short family)
+               unsigned short family, u32 if_id)
 {
        static xfrm_address_t saddr_wildcard = { };
        struct net *net = xp_net(pol);
@@ -955,6 +954,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
                if (x->props.family == encap_family &&
                    x->props.reqid == tmpl->reqid &&
                    (mark & x->mark.m) == x->mark.v &&
+                   x->if_id == if_id &&
                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
                    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
                    tmpl->mode == x->props.mode &&
@@ -971,6 +971,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
                if (x->props.family == encap_family &&
                    x->props.reqid == tmpl->reqid &&
                    (mark & x->mark.m) == x->mark.v &&
+                   x->if_id == if_id &&
                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
                    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
                    tmpl->mode == x->props.mode &&
@@ -1010,6 +1011,7 @@ found:
                 * to current session. */
                xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
                memcpy(&x->mark, &pol->mark, sizeof(x->mark));
+               x->if_id = if_id;
 
                error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
                if (error) {
@@ -1067,7 +1069,7 @@ out:
 }
 
 struct xfrm_state *
-xfrm_stateonly_find(struct net *net, u32 mark,
+xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
                    xfrm_address_t *daddr, xfrm_address_t *saddr,
                    unsigned short family, u8 mode, u8 proto, u32 reqid)
 {
@@ -1080,6 +1082,7 @@ xfrm_stateonly_find(struct net *net, u32 mark,
                if (x->props.family == family &&
                    x->props.reqid == reqid &&
                    (mark & x->mark.m) == x->mark.v &&
+                   x->if_id == if_id &&
                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
                    xfrm_state_addr_check(x, daddr, saddr, family) &&
                    mode == x->props.mode &&
@@ -1160,11 +1163,13 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
        struct xfrm_state *x;
        unsigned int h;
        u32 mark = xnew->mark.v & xnew->mark.m;
+       u32 if_id = xnew->if_id;
 
        h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
        hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
                if (x->props.family     == family &&
                    x->props.reqid      == reqid &&
+                   x->if_id            == if_id &&
                    (mark & x->mark.m) == x->mark.v &&
                    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
                    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
@@ -1187,7 +1192,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
 static struct xfrm_state *__find_acq_core(struct net *net,
                                          const struct xfrm_mark *m,
                                          unsigned short family, u8 mode,
-                                         u32 reqid, u8 proto,
+                                         u32 reqid, u32 if_id, u8 proto,
                                          const xfrm_address_t *daddr,
                                          const xfrm_address_t *saddr,
                                          int create)
@@ -1242,6 +1247,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
                x->props.family = family;
                x->props.mode = mode;
                x->props.reqid = reqid;
+               x->if_id = if_id;
                x->mark.v = m->v;
                x->mark.m = m->m;
                x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
@@ -1296,7 +1302,7 @@ int xfrm_state_add(struct xfrm_state *x)
 
        if (use_spi && !x1)
                x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
-                                    x->props.reqid, x->id.proto,
+                                    x->props.reqid, x->if_id, x->id.proto,
                                     &x->id.daddr, &x->props.saddr, 0);
 
        __xfrm_state_bump_genids(x);
@@ -1395,6 +1401,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
        x->props.flags = orig->props.flags;
        x->props.extra_flags = orig->props.extra_flags;
 
+       x->if_id = orig->if_id;
        x->tfcpad = orig->tfcpad;
        x->replay_maxdiff = orig->replay_maxdiff;
        x->replay_maxage = orig->replay_maxage;
@@ -1554,6 +1561,19 @@ out:
                if (x1->curlft.use_time)
                        xfrm_state_check_expire(x1);
 
+               if (x->props.smark.m || x->props.smark.v || x->if_id) {
+                       spin_lock_bh(&net->xfrm.xfrm_state_lock);
+
+                       if (x->props.smark.m || x->props.smark.v)
+                               x1->props.smark = x->props.smark;
+
+                       if (x->if_id)
+                               x1->if_id = x->if_id;
+
+                       __xfrm_state_bump_genids(x1);
+                       spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+               }
+
                err = 0;
                x->km.state = XFRM_STATE_DEAD;
                __xfrm_state_put(x);
@@ -1571,7 +1591,7 @@ EXPORT_SYMBOL(xfrm_state_update);
 int xfrm_state_check_expire(struct xfrm_state *x)
 {
        if (!x->curlft.use_time)
-               x->curlft.use_time = get_seconds();
+               x->curlft.use_time = ktime_get_real_seconds();
 
        if (x->curlft.bytes >= x->lft.hard_byte_limit ||
            x->curlft.packets >= x->lft.hard_packet_limit) {
@@ -1619,13 +1639,13 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
 
 struct xfrm_state *
 xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
-             u8 proto, const xfrm_address_t *daddr,
+             u32 if_id, u8 proto, const xfrm_address_t *daddr,
              const xfrm_address_t *saddr, int create, unsigned short family)
 {
        struct xfrm_state *x;
 
        spin_lock_bh(&net->xfrm.xfrm_state_lock);
-       x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create);
+       x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
        return x;
index 33878e6e0d0a01cdc5da8ab20bdb92d0d93e056d..4791aa8b818583b5fcb5812fd561342fbab2edfa 100644 (file)
@@ -527,6 +527,19 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
                x->replay_maxdiff = nla_get_u32(rt);
 }
 
+static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m)
+{
+       if (attrs[XFRMA_SET_MARK]) {
+               m->v = nla_get_u32(attrs[XFRMA_SET_MARK]);
+               if (attrs[XFRMA_SET_MARK_MASK])
+                       m->m = nla_get_u32(attrs[XFRMA_SET_MARK_MASK]);
+               else
+                       m->m = 0xffffffff;
+       } else {
+               m->v = m->m = 0;
+       }
+}
+
 static struct xfrm_state *xfrm_state_construct(struct net *net,
                                               struct xfrm_usersa_info *p,
                                               struct nlattr **attrs,
@@ -579,8 +592,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
        xfrm_mark_get(attrs, &x->mark);
 
-       if (attrs[XFRMA_OUTPUT_MARK])
-               x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
+       xfrm_smark_init(attrs, &x->props.smark);
+
+       if (attrs[XFRMA_IF_ID])
+               x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
 
        err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
        if (err)
@@ -824,6 +839,18 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
        return 0;
 }
 
+static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m)
+{
+       int ret = 0;
+
+       if (m->v | m->m) {
+               ret = nla_put_u32(skb, XFRMA_SET_MARK, m->v);
+               if (!ret)
+                       ret = nla_put_u32(skb, XFRMA_SET_MARK_MASK, m->m);
+       }
+       return ret;
+}
+
 /* Don't change this without updating xfrm_sa_len! */
 static int copy_to_user_state_extra(struct xfrm_state *x,
                                    struct xfrm_usersa_info *p,
@@ -887,6 +914,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
        ret = xfrm_mark_put(skb, &x->mark);
        if (ret)
                goto out;
+
+       ret = xfrm_smark_put(skb, &x->props.smark);
+       if (ret)
+               goto out;
+
        if (x->replay_esn)
                ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL,
                              xfrm_replay_state_esn_len(x->replay_esn),
@@ -900,8 +932,8 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
                ret = copy_user_offload(&x->xso, skb);
        if (ret)
                goto out;
-       if (x->props.output_mark) {
-               ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark);
+       if (x->if_id) {
+               ret = nla_put_u32(skb, XFRMA_IF_ID, x->if_id);
                if (ret)
                        goto out;
        }
@@ -1255,6 +1287,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
        int err;
        u32 mark;
        struct xfrm_mark m;
+       u32 if_id = 0;
 
        p = nlmsg_data(nlh);
        err = verify_spi_info(p->info.id.proto, p->min, p->max);
@@ -1267,6 +1300,10 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
        x = NULL;
 
        mark = xfrm_mark_get(attrs, &m);
+
+       if (attrs[XFRMA_IF_ID])
+               if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
        if (p->info.seq) {
                x = xfrm_find_acq_byseq(net, mark, p->info.seq);
                if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
@@ -1277,7 +1314,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        if (!x)
                x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid,
-                                 p->info.id.proto, daddr,
+                                 if_id, p->info.id.proto, daddr,
                                  &p->info.saddr, 1,
                                  family);
        err = -ENOENT;
@@ -1565,6 +1602,9 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
 
        xfrm_mark_get(attrs, &xp->mark);
 
+       if (attrs[XFRMA_IF_ID])
+               xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
        return xp;
  error:
        *errp = err;
@@ -1712,6 +1752,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
                err = copy_to_user_policy_type(xp->type, skb);
        if (!err)
                err = xfrm_mark_put(skb, &xp->mark);
+       if (!err)
+               err = xfrm_if_id_put(skb, xp->if_id);
        if (err) {
                nlmsg_cancel(skb, nlh);
                return err;
@@ -1793,6 +1835,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
        int delete;
        struct xfrm_mark m;
        u32 mark = xfrm_mark_get(attrs, &m);
+       u32 if_id = 0;
 
        p = nlmsg_data(nlh);
        delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
@@ -1805,8 +1848,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (err)
                return err;
 
+       if (attrs[XFRMA_IF_ID])
+               if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
        if (p->index)
-               xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, delete, &err);
+               xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err);
        else {
                struct nlattr *rt = attrs[XFRMA_SEC_CTX];
                struct xfrm_sec_ctx *ctx;
@@ -1823,7 +1869,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
                        if (err)
                                return err;
                }
-               xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, &p->sel,
+               xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel,
                                           ctx, delete, &err);
                security_xfrm_policy_free(ctx);
        }
@@ -1946,6 +1992,10 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
        if (err)
                goto out_cancel;
 
+       err = xfrm_if_id_put(skb, x->if_id);
+       if (err)
+               goto out_cancel;
+
        nlmsg_end(skb, nlh);
        return 0;
 
@@ -2088,6 +2138,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
        int err = -ENOENT;
        struct xfrm_mark m;
        u32 mark = xfrm_mark_get(attrs, &m);
+       u32 if_id = 0;
 
        err = copy_from_user_policy_type(&type, attrs);
        if (err)
@@ -2097,8 +2148,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (err)
                return err;
 
+       if (attrs[XFRMA_IF_ID])
+               if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
        if (p->index)
-               xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err);
+               xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err);
        else {
                struct nlattr *rt = attrs[XFRMA_SEC_CTX];
                struct xfrm_sec_ctx *ctx;
@@ -2115,7 +2169,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
                        if (err)
                                return err;
                }
-               xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir,
+               xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir,
                                           &p->sel, ctx, 0, &err);
                security_xfrm_policy_free(ctx);
        }
@@ -2497,7 +2551,9 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
        [XFRMA_PROTO]           = { .type = NLA_U8 },
        [XFRMA_ADDRESS_FILTER]  = { .len = sizeof(struct xfrm_address_filter) },
        [XFRMA_OFFLOAD_DEV]     = { .len = sizeof(struct xfrm_user_offload) },
-       [XFRMA_OUTPUT_MARK]     = { .type = NLA_U32 },
+       [XFRMA_SET_MARK]        = { .type = NLA_U32 },
+       [XFRMA_SET_MARK_MASK]   = { .type = NLA_U32 },
+       [XFRMA_IF_ID]           = { .type = NLA_U32 },
 };
 
 static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@ -2629,6 +2685,10 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
        if (err)
                return err;
 
+       err = xfrm_if_id_put(skb, x->if_id);
+       if (err)
+               return err;
+
        nlmsg_end(skb, nlh);
        return 0;
 }
@@ -2723,8 +2783,12 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
                l += nla_total_size(sizeof(x->props.extra_flags));
        if (x->xso.dev)
                 l += nla_total_size(sizeof(x->xso));
-       if (x->props.output_mark)
-               l += nla_total_size(sizeof(x->props.output_mark));
+       if (x->props.smark.v | x->props.smark.m) {
+               l += nla_total_size(sizeof(x->props.smark.v));
+               l += nla_total_size(sizeof(x->props.smark.m));
+       }
+       if (x->if_id)
+               l += nla_total_size(sizeof(x->if_id));
 
        /* Must count x->lastused as it may become non-zero behind our back. */
        l += nla_total_size_64bit(sizeof(u64));
@@ -2854,6 +2918,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
                err = copy_to_user_policy_type(xp->type, skb);
        if (!err)
                err = xfrm_mark_put(skb, &xp->mark);
+       if (!err)
+               err = xfrm_if_id_put(skb, xp->if_id);
        if (err) {
                nlmsg_cancel(skb, nlh);
                return err;
@@ -2970,6 +3036,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
                err = copy_to_user_policy_type(xp->type, skb);
        if (!err)
                err = xfrm_mark_put(skb, &xp->mark);
+       if (!err)
+               err = xfrm_if_id_put(skb, xp->if_id);
        if (err) {
                nlmsg_cancel(skb, nlh);
                return err;
@@ -3051,6 +3119,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
                err = copy_to_user_policy_type(xp->type, skb);
        if (!err)
                err = xfrm_mark_put(skb, &xp->mark);
+       if (!err)
+               err = xfrm_if_id_put(skb, xp->if_id);
        if (err)
                goto out_free_skb;
 
@@ -3284,4 +3354,3 @@ module_init(xfrm_user_init);
 module_exit(xfrm_user_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM);
-
index 1303af10e54d5d44de7c4789a48c030ff2646e66..9ea2f7b648696e85c5ad097d5d09d6be5f2df51f 100644 (file)
@@ -52,6 +52,7 @@ hostprogs-y += xdp_adjust_tail
 hostprogs-y += xdpsock
 hostprogs-y += xdp_fwd
 hostprogs-y += task_fd_query
+hostprogs-y += xdp_sample_pkts
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -107,6 +108,7 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o
 xdpsock-objs := bpf_load.o xdpsock_user.o
 xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
 task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
+xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -163,6 +165,7 @@ always += xdp_adjust_tail_kern.o
 always += xdpsock_kern.o
 always += xdp_fwd_kern.o
 always += task_fd_query_kern.o
+always += xdp_sample_pkts_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -179,6 +182,7 @@ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/
 
 HOST_LOADLIBES         += $(LIBBPF) -lelf
 HOSTLOADLIBES_tracex4          += -lrt
index 89161c9ed466b63bda3b25b275559bf0892d2fee..904e775d1a44c5f9cd0582bf4db8af4b3b0a011a 100644 (file)
@@ -107,6 +107,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
                return -1;
        }
 
+       if (prog_cnt == MAX_PROGS)
+               return -1;
+
        fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
                              bpf_log_buf, BPF_LOG_BUF_SIZE);
        if (fd < 0) {
index 3b5be2364975ae26b4dc8166c5c925bc1907aab1..a9277b118c330b09f3c253785d20999d02c6410f 100644 (file)
@@ -51,7 +51,7 @@ int main(int argc, char **argv)
        if (argc > 3)
                filter_id = atoi(argv[3]);
 
-       if (filter_id > prog_cnt) {
+       if (filter_id >= prog_cnt) {
                printf("Invalid program id; program not found in file\n");
                return EXIT_FAILURE;
        }
index 303e9e7161f3169ebcad88974a0d549f01e7db30..8cb703671b04687d17bd1d7c90b9a3e7f2fb193c 100644 (file)
@@ -134,7 +134,16 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
                        return false;
                eth_type = vlan_hdr->h_vlan_encapsulated_proto;
        }
-       /* TODO: Handle double VLAN tagged packet */
+       /* Handle double VLAN tagged packet */
+       if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
+               struct vlan_hdr *vlan_hdr;
+
+               vlan_hdr = (void *)eth + offset;
+               offset += sizeof(*vlan_hdr);
+               if ((void *)eth + offset > data_end)
+                       return false;
+               eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+       }
 
        *eth_proto = ntohs(eth_type);
        *l3_offset = offset;
index 3fd2092916537b7ec4e5d00f25836d9c42729a53..222a83eed1cbf0b213998a47153334b3fd4127b9 100644 (file)
@@ -4,6 +4,8 @@
  *  Example howto extract XDP RX-queue info
  */
 #include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/in.h>
 #include "bpf_helpers.h"
 
 /* Config setup from with userspace
 struct config {
        __u32 action;
        int ifindex;
+       __u32 options;
+};
+enum cfg_options_flags {
+       NO_TOUCH = 0x0U,
+       READ_MEM = 0x1U,
+       SWAP_MAC = 0x2U,
 };
 struct bpf_map_def SEC("maps") config_map = {
        .type           = BPF_MAP_TYPE_ARRAY,
@@ -45,6 +53,23 @@ struct bpf_map_def SEC("maps") rx_queue_index_map = {
        .max_entries    = MAX_RXQs + 1,
 };
 
+static __always_inline
+void swap_src_dst_mac(void *data)
+{
+       unsigned short *p = data;
+       unsigned short dst[3];
+
+       dst[0] = p[0];
+       dst[1] = p[1];
+       dst[2] = p[2];
+       p[0] = p[3];
+       p[1] = p[4];
+       p[2] = p[5];
+       p[3] = dst[0];
+       p[4] = dst[1];
+       p[5] = dst[2];
+}
+
 SEC("xdp_prog0")
 int  xdp_prognum0(struct xdp_md *ctx)
 {
@@ -90,6 +115,24 @@ int  xdp_prognum0(struct xdp_md *ctx)
        if (key == MAX_RXQs)
                rxq_rec->issue++;
 
+       /* Default: Don't touch packet data, only count packets */
+       if (unlikely(config->options & (READ_MEM|SWAP_MAC))) {
+               struct ethhdr *eth = data;
+
+               if (eth + 1 > data_end)
+                       return XDP_ABORTED;
+
+               /* Avoid compiler removing this: Drop non 802.3 Ethertypes */
+               if (ntohs(eth->h_proto) < ETH_P_802_3_MIN)
+                       return XDP_ABORTED;
+
+               /* XDP_TX requires changing MAC-addrs, else HW may drop.
+                * Can also be enabled with --swapmac (for test purposes)
+                */
+               if (unlikely(config->options & SWAP_MAC))
+                       swap_src_dst_mac(data);
+       }
+
        return config->action;
 }
 
index e4e9ba52bff02c457410543424b0f90148a01730..248a7eab9531eba322f6fb5b404c9a26a516adba 100644 (file)
@@ -50,6 +50,8 @@ static const struct option long_options[] = {
        {"sec",         required_argument,      NULL, 's' },
        {"no-separators", no_argument,          NULL, 'z' },
        {"action",      required_argument,      NULL, 'a' },
+       {"readmem",     no_argument,            NULL, 'r' },
+       {"swapmac",     no_argument,            NULL, 'm' },
        {0, 0, NULL,  0 }
 };
 
@@ -66,6 +68,12 @@ static void int_exit(int sig)
 struct config {
        __u32 action;
        int ifindex;
+       __u32 options;
+};
+enum cfg_options_flags {
+       NO_TOUCH = 0x0U,
+       READ_MEM = 0x1U,
+       SWAP_MAC = 0x2U,
 };
 #define XDP_ACTION_MAX (XDP_TX + 1)
 #define XDP_ACTION_MAX_STRLEN 11
@@ -109,6 +117,18 @@ static void list_xdp_actions(void)
        printf("\n");
 }
 
+static char* options2str(enum cfg_options_flags flag)
+{
+       if (flag == NO_TOUCH)
+               return "no_touch";
+       if (flag & SWAP_MAC)
+               return "swapmac";
+       if (flag & READ_MEM)
+               return "read";
+       fprintf(stderr, "ERR: Unknown config option flags");
+       exit(EXIT_FAIL);
+}
+
 static void usage(char *argv[])
 {
        int i;
@@ -305,7 +325,7 @@ static __u64 calc_errs_pps(struct datarec *r,
 
 static void stats_print(struct stats_record *stats_rec,
                        struct stats_record *stats_prev,
-                       int action)
+                       int action, __u32 cfg_opt)
 {
        unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
        unsigned int nr_cpus = bpf_num_possible_cpus();
@@ -316,8 +336,8 @@ static void stats_print(struct stats_record *stats_rec,
        int i;
 
        /* Header */
-       printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n",
-              ifname, ifindex, action2str(action));
+       printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n",
+              ifname, ifindex, action2str(action), options2str(cfg_opt));
 
        /* stats_global_map */
        {
@@ -399,7 +419,7 @@ static inline void swap(struct stats_record **a, struct stats_record **b)
        *b = tmp;
 }
 
-static void stats_poll(int interval, int action)
+static void stats_poll(int interval, int action, __u32 cfg_opt)
 {
        struct stats_record *record, *prev;
 
@@ -410,7 +430,7 @@ static void stats_poll(int interval, int action)
        while (1) {
                swap(&prev, &record);
                stats_collect(record);
-               stats_print(record, prev, action);
+               stats_print(record, prev, action, cfg_opt);
                sleep(interval);
        }
 
@@ -421,6 +441,7 @@ static void stats_poll(int interval, int action)
 
 int main(int argc, char **argv)
 {
+       __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
        struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
        struct bpf_prog_load_attr prog_load_attr = {
                .prog_type      = BPF_PROG_TYPE_XDP,
@@ -435,6 +456,7 @@ int main(int argc, char **argv)
        int interval = 2;
        __u32 key = 0;
 
+
        char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
        int action = XDP_PASS; /* Default action */
        char *action_str = NULL;
@@ -496,6 +518,12 @@ int main(int argc, char **argv)
                        action_str = (char *)&action_str_buf;
                        strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
                        break;
+               case 'r':
+                       cfg_options |= READ_MEM;
+                       break;
+               case 'm':
+                       cfg_options |= SWAP_MAC;
+                       break;
                case 'h':
                error:
                default:
@@ -523,6 +551,11 @@ int main(int argc, char **argv)
        }
        cfg.action = action;
 
+       /* XDP_TX requires changing MAC-addrs, else HW may drop */
+       if (action == XDP_TX)
+               cfg_options |= SWAP_MAC;
+       cfg.options = cfg_options;
+
        /* Trick to pretty printf with thousands separators use %' */
        if (use_separators)
                setlocale(LC_NUMERIC, "en_US");
@@ -542,6 +575,6 @@ int main(int argc, char **argv)
                return EXIT_FAIL_XDP;
        }
 
-       stats_poll(interval, action);
+       stats_poll(interval, action, cfg_options);
        return EXIT_OK;
 }
diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
new file mode 100644 (file)
index 0000000..f7ca8b8
--- /dev/null
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define SAMPLE_SIZE 64ul
+#define MAX_CPUS 128
+
+#define bpf_printk(fmt, ...)                                   \
+({                                                             \
+              char ____fmt[] = fmt;                            \
+              bpf_trace_printk(____fmt, sizeof(____fmt),       \
+                               ##__VA_ARGS__);                 \
+})
+
+struct bpf_map_def SEC("maps") my_map = {
+       .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(u32),
+       .max_entries = MAX_CPUS,
+};
+
+SEC("xdp_sample")
+int xdp_sample_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+
+       /* Metadata will be in the perf event before the packet data. */
+       struct S {
+               u16 cookie;
+               u16 pkt_len;
+       } __packed metadata;
+
+       if (data < data_end) {
+               /* The XDP perf_event_output handler will use the upper 32 bits
+                * of the flags argument as a number of bytes to include of the
+                * packet payload in the event data. If the size is too big, the
+                * call to bpf_perf_event_output will fail and return -EFAULT.
+                *
+                * See bpf_xdp_event_output in net/core/filter.c.
+                *
+                * The BPF_F_CURRENT_CPU flag means that the event output fd
+                * will be indexed by the CPU number in the event map.
+                */
+               u64 flags = BPF_F_CURRENT_CPU;
+               u16 sample_size;
+               int ret;
+
+               metadata.cookie = 0xdead;
+               metadata.pkt_len = (u16)(data_end - data);
+               sample_size = min(metadata.pkt_len, SAMPLE_SIZE);
+               flags |= (u64)sample_size << 32;
+
+               ret = bpf_perf_event_output(ctx, &my_map, flags,
+                                           &metadata, sizeof(metadata));
+               if (ret)
+                       bpf_printk("perf_event_output failed: %d\n", ret);
+       }
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
new file mode 100644 (file)
index 0000000..8dd87c1
--- /dev/null
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <net/if.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/sysinfo.h>
+#include <sys/ioctl.h>
+#include <signal.h>
+#include <libbpf.h>
+#include <bpf/bpf.h>
+
+#include "perf-sys.h"
+#include "trace_helpers.h"
+
+#define MAX_CPUS 128
+static int pmu_fds[MAX_CPUS], if_idx;
+static struct perf_event_mmap_page *headers[MAX_CPUS];
+static char *if_name;
+
+static int do_attach(int idx, int fd, const char *name)
+{
+       int err;
+
+       err = bpf_set_link_xdp_fd(idx, fd, 0);
+       if (err < 0)
+               printf("ERROR: failed to attach program to %s\n", name);
+
+       return err;
+}
+
+static int do_detach(int idx, const char *name)
+{
+       int err;
+
+       err = bpf_set_link_xdp_fd(idx, -1, 0);
+       if (err < 0)
+               printf("ERROR: failed to detach program from %s\n", name);
+
+       return err;
+}
+
+#define SAMPLE_SIZE 64
+
+static int print_bpf_output(void *data, int size)
+{
+       struct {
+               __u16 cookie;
+               __u16 pkt_len;
+               __u8  pkt_data[SAMPLE_SIZE];
+       } __packed *e = data;
+       int i;
+
+       if (e->cookie != 0xdead) {
+               printf("BUG cookie %x sized %d\n",
+                      e->cookie, size);
+               return LIBBPF_PERF_EVENT_ERROR;
+       }
+
+       printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len);
+       for (i = 0; i < 14 && i < e->pkt_len; i++)
+               printf("%02x ", e->pkt_data[i]);
+       printf("\n");
+
+       return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void test_bpf_perf_event(int map_fd, int num)
+{
+       struct perf_event_attr attr = {
+               .sample_type = PERF_SAMPLE_RAW,
+               .type = PERF_TYPE_SOFTWARE,
+               .config = PERF_COUNT_SW_BPF_OUTPUT,
+               .wakeup_events = 1, /* get an fd notification for every event */
+       };
+       int i;
+
+       for (i = 0; i < num; i++) {
+               int key = i;
+
+               pmu_fds[i] = sys_perf_event_open(&attr, -1/*pid*/, i/*cpu*/,
+                                                -1/*group_fd*/, 0);
+
+               assert(pmu_fds[i] >= 0);
+               assert(bpf_map_update_elem(map_fd, &key,
+                                          &pmu_fds[i], BPF_ANY) == 0);
+               ioctl(pmu_fds[i], PERF_EVENT_IOC_ENABLE, 0);
+       }
+}
+
+static void sig_handler(int signo)
+{
+       do_detach(if_idx, if_name);
+       exit(0);
+}
+
+int main(int argc, char **argv)
+{
+       struct bpf_prog_load_attr prog_load_attr = {
+               .prog_type      = BPF_PROG_TYPE_XDP,
+       };
+       struct bpf_object *obj;
+       struct bpf_map *map;
+       int prog_fd, map_fd;
+       char filename[256];
+       int ret, err, i;
+       int numcpus;
+
+       if (argc < 2) {
+               printf("Usage: %s <ifname>\n", argv[0]);
+               return 1;
+       }
+
+       numcpus = get_nprocs();
+       if (numcpus > MAX_CPUS)
+               numcpus = MAX_CPUS;
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+       prog_load_attr.file = filename;
+
+       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+               return 1;
+
+       if (!prog_fd) {
+               printf("load_bpf_file: %s\n", strerror(errno));
+               return 1;
+       }
+
+       map = bpf_map__next(NULL, obj);
+       if (!map) {
+               printf("finding a map in obj file failed\n");
+               return 1;
+       }
+       map_fd = bpf_map__fd(map);
+
+       if_idx = if_nametoindex(argv[1]);
+       if (!if_idx)
+               if_idx = strtoul(argv[1], NULL, 0);
+
+       if (!if_idx) {
+               fprintf(stderr, "Invalid ifname\n");
+               return 1;
+       }
+       if_name = argv[1];
+       err = do_attach(if_idx, prog_fd, argv[1]);
+       if (err)
+               return err;
+
+       if (signal(SIGINT, sig_handler) ||
+           signal(SIGHUP, sig_handler) ||
+           signal(SIGTERM, sig_handler)) {
+               perror("signal");
+               return 1;
+       }
+
+       test_bpf_perf_event(map_fd, numcpus);
+
+       for (i = 0; i < numcpus; i++)
+               if (perf_event_mmap_header(pmu_fds[i], &headers[i]) < 0)
+                       return 1;
+
+       ret = perf_event_poller_multi(pmu_fds, headers, numcpus,
+                                     print_bpf_output);
+       kill(0, SIGINT);
+       return ret;
+}
index 7b7433a1a34c6257d1a93f2f976e5b71cce9c60a..74b951f55608dca90a95cba914f3e74cd7e3b197 100644 (file)
@@ -159,7 +159,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
        switch (sclass) {
        case SECCLASS_NETLINK_ROUTE_SOCKET:
                /* RTM_MAX always point to RTM_SETxxxx, ie RTM_NEWxxx + 3 */
-               BUILD_BUG_ON(RTM_MAX != (RTM_NEWCACHEREPORT + 3));
+               BUILD_BUG_ON(RTM_MAX != (RTM_NEWCHAIN + 3));
                err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
                                 sizeof(nlmsg_route_perms));
                break;
diff --git a/tools/bpf/Makefile.helpers b/tools/bpf/Makefile.helpers
new file mode 100644 (file)
index 0000000..c34fea7
--- /dev/null
@@ -0,0 +1,59 @@
+ifndef allow-override
+  include ../scripts/Makefile.include
+  include ../scripts/utilities.mak
+else
+  # Assume Makefile.helpers is being run from bpftool/Documentation
+  # subdirectory. Go up two more directories to fetch bpf.h header and
+  # associated script.
+  UP2DIR := ../../
+endif
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
+man7dir = $(mandir)/man7
+
+HELPERS_RST = bpf-helpers.rst
+MAN7_RST = $(HELPERS_RST)
+
+_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
+DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+helpers: man7
+man7: $(DOC_MAN7)
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
+$(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h
+       $(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@
+
+$(OUTPUT)%.7: $(OUTPUT)%.rst
+ifndef RST2MAN_DEP
+       $(error "rst2man not found, but required to generate man pages")
+endif
+       $(QUIET_GEN)rst2man $< > $@
+
+helpers-clean:
+       $(call QUIET_CLEAN, eBPF_helpers-manpage)
+       $(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
+
+helpers-install: helpers
+       $(call QUIET_INSTALL, eBPF_helpers-manpage)
+       $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
+       $(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+helpers-uninstall:
+       $(call QUIET_UNINST, eBPF_helpers-manpage)
+       $(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
+       $(Q)$(RMDIR) $(DESTDIR)$(man7dir)
+
+.PHONY: helpers helpers-clean helpers-install helpers-uninstall
index a9d47c1558bb104ad9c3b0ba913bec64ff32fc72..f7663a3e60c91605d10eb301d044def46bdaa6e7 100644 (file)
@@ -15,12 +15,15 @@ prefix ?= /usr/local
 mandir ?= $(prefix)/man
 man8dir = $(mandir)/man8
 
-MAN8_RST = $(wildcard *.rst)
+# Load targets for building eBPF helpers man page.
+include ../../Makefile.helpers
+
+MAN8_RST = $(filter-out $(HELPERS_RST),$(wildcard *.rst))
 
 _DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
 DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
 
-man: man8
+man: man8 helpers
 man8: $(DOC_MAN8)
 
 RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
@@ -31,16 +34,16 @@ ifndef RST2MAN_DEP
 endif
        $(QUIET_GEN)rst2man $< > $@
 
-clean:
+clean: helpers-clean
        $(call QUIET_CLEAN, Documentation)
        $(Q)$(RM) $(DOC_MAN8)
 
-install: man
+install: man helpers-install
        $(call QUIET_INSTALL, Documentation-man)
        $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
        $(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
 
-uninstall:
+uninstall: helpers-uninstall
        $(call QUIET_UNINST, Documentation-man)
        $(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
        $(Q)$(RMDIR) $(DESTDIR)$(man8dir)
index 7b0e6d453e922f4db9f4536c217a76acd6b10d4b..edbe81534c6d2941b955cd0ab15cf845110fb130 100644 (file)
@@ -15,12 +15,13 @@ SYNOPSIS
        *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
 
        *COMMANDS* :=
-       { **show** | **list** | **attach** | **detach** | **help** }
+       { **show** | **list** | **tree** | **attach** | **detach** | **help** }
 
 MAP COMMANDS
 =============
 
 |      **bpftool** **cgroup { show | list }** *CGROUP*
+|      **bpftool** **cgroup tree** [*CGROUP_ROOT*]
 |      **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
 |      **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 |      **bpftool** **cgroup help**
@@ -39,6 +40,15 @@ DESCRIPTION
                  Output will start with program ID followed by attach type,
                  attach flags and program name.
 
+       **bpftool cgroup tree** [*CGROUP_ROOT*]
+                 Iterate over all cgroups in *CGROUP_ROOT* and list all
+                 attached programs. If *CGROUP_ROOT* is not specified,
+                 bpftool uses cgroup v2 mountpoint.
+
+                 The output is similar to the output of cgroup show/list
+                 commands: it starts with absolute cgroup path, followed by
+                 program ID, attach type, attach flags and program name.
+
        **bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
                  Attach program *PROG* to the cgroup *CGROUP* with attach type
                  *ATTACH_TYPE* and optional *ATTACH_FLAGS*.
index 43d34a5c3ec527a95b7bbde6f28616597faf9d66..64156a16d5300b64e0d9c2f5297605b88e6265b2 100644 (file)
@@ -24,10 +24,20 @@ MAP COMMANDS
 |      **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
 |      **bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |      **bpftool** **prog pin** *PROG* *FILE*
-|      **bpftool** **prog load** *OBJ* *FILE*
+|      **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
 |      **bpftool** **prog help**
 |
+|      *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
 |      *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+|      *TYPE* := {
+|              **socket** | **kprobe** | **kretprobe** | **classifier** | **action** |
+|              **tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** |
+|              **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** |
+|              **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
+|              **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
+|              **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6**
+|      }
+
 
 DESCRIPTION
 ===========
@@ -64,8 +74,19 @@ DESCRIPTION
 
                  Note: *FILE* must be located in *bpffs* mount.
 
-       **bpftool prog load** *OBJ* *FILE*
+       **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
                  Load bpf program from binary *OBJ* and pin as *FILE*.
+                 **type** is optional, if not specified program type will be
+                 inferred from section names.
+                 By default bpftool will create new maps as declared in the ELF
+                 object being loaded.  **map** parameter allows for the reuse
+                 of existing maps.  It can be specified multiple times, each
+                 time for a different map.  *IDX* refers to index of the map
+                 to be replaced in the ELF file counting from 0, while *NAME*
+                 allows to replace a map by name.  *MAP* specifies the map to
+                 use, referring to it by **id** or through a **pinned** file.
+                 If **dev** *NAME* is specified program will be loaded onto
+                 given networking device (offload).
 
                  Note: *FILE* must be located in *bpffs* mount.
 
@@ -159,6 +180,14 @@ EXAMPLES
     mov    %rbx,0x0(%rbp)
     48 89 5d 00
 
+|
+| **# bpftool prog load xdp1_kern.o /sys/fs/bpf/xdp1 type xdp map name rxcnt id 7**
+| **# bpftool prog show pinned /sys/fs/bpf/xdp1**
+|   9: xdp  name xdp_prog1  tag 539ec6ce11b52f98  gpl
+|      loaded_at 2018-06-25T16:17:31-0700  uid 0
+|      xlated 488B  jited 336B  memlock 4096B  map_ids 7
+| **# rm /sys/fs/bpf/xdp1**
+|
 
 SEE ALSO
 ========
index 892dbf095bffd79ac6bbc9b3a70a33efa3955f54..74288a2197abfcbf8763984516faee7f19253b50 100644 (file)
@@ -23,10 +23,10 @@ endif
 
 LIBBPF = $(BPF_PATH)libbpf.a
 
-BPFTOOL_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
+BPFTOOL_VERSION := $(shell make --no-print-directory -sC ../../.. kernelversion)
 
 $(LIBBPF): FORCE
-       $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
+       $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a
 
 $(LIBBPF)-clean:
        $(call QUIET_CLEAN, libbpf)
@@ -52,7 +52,7 @@ INSTALL ?= install
 RM ?= rm -f
 
 FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args
+FEATURE_TESTS = libbfd disassembler-four-args reallocarray
 FEATURE_DISPLAY = libbfd disassembler-four-args
 
 check_feat := 1
@@ -75,6 +75,10 @@ ifeq ($(feature-disassembler-four-args), 1)
 CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
+ifeq ($(feature-reallocarray), 0)
+CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
+endif
+
 include $(wildcard $(OUTPUT)*.d)
 
 all: $(OUTPUT)bpftool
index 1e108332164367a769c798dfd75c89dcca9e413b..598066c401912a5d69f77ccc68c9fa78da8566e9 100644 (file)
@@ -99,6 +99,35 @@ _bpftool_get_prog_tags()
         command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) )
 }
 
+_bpftool_get_obj_map_names()
+{
+    local obj
+
+    obj=$1
+
+    maps=$(objdump -j maps -t $obj 2>/dev/null | \
+        command awk '/g     . maps/ {print $NF}')
+
+    COMPREPLY+=( $( compgen -W "$maps" -- "$cur" ) )
+}
+
+_bpftool_get_obj_map_idxs()
+{
+    local obj
+
+    obj=$1
+
+    nmaps=$(objdump -j maps -t $obj 2>/dev/null | grep -c 'g     . maps')
+
+    COMPREPLY+=( $( compgen -W "$(seq 0 $((nmaps - 1)))" -- "$cur" ) )
+}
+
+_sysfs_get_netdevs()
+{
+    COMPREPLY+=( $( compgen -W "$( ls /sys/class/net 2>/dev/null )" -- \
+        "$cur" ) )
+}
+
 # For bpftool map update: retrieve type of the map to update.
 _bpftool_map_update_map_type()
 {
@@ -153,6 +182,13 @@ _bpftool()
     local cur prev words objword
     _init_completion || return
 
+    # Deal with options
+    if [[ ${words[cword]} == -* ]]; then
+        local c='--version --json --pretty --bpffs'
+        COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
+        return 0
+    fi
+
     # Deal with simplest keywords
     case $prev in
         help|hex|opcodes|visual)
@@ -172,20 +208,23 @@ _bpftool()
             ;;
     esac
 
-    # Search for object and command
-    local object command cmdword
-    for (( cmdword=1; cmdword < ${#words[@]}-1; cmdword++ )); do
-        [[ -n $object ]] && command=${words[cmdword]} && break
-        [[ ${words[cmdword]} != -* ]] && object=${words[cmdword]}
+    # Remove all options so completions don't have to deal with them.
+    local i
+    for (( i=1; i < ${#words[@]}; )); do
+        if [[ ${words[i]::1} == - ]]; then
+            words=( "${words[@]:0:i}" "${words[@]:i+1}" )
+            [[ $i -le $cword ]] && cword=$(( cword - 1 ))
+        else
+            i=$(( ++i ))
+        fi
     done
+    cur=${words[cword]}
+    prev=${words[cword - 1]}
+
+    local object=${words[1]} command=${words[2]}
 
-    if [[ -z $object ]]; then
+    if [[ -z $object || $cword -eq 1 ]]; then
         case $cur in
-            -*)
-                local c='--version --json --pretty'
-                COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
-                return 0
-                ;;
             *)
                 COMPREPLY=( $( compgen -W "$( bpftool help 2>&1 | \
                     command sed \
@@ -204,12 +243,14 @@ _bpftool()
     # Completion depends on object and command in use
     case $object in
         prog)
-            case $prev in
-                id)
-                    _bpftool_get_prog_ids
-                    return 0
-                    ;;
-            esac
+            if [[ $command != "load" ]]; then
+                case $prev in
+                    id)
+                        _bpftool_get_prog_ids
+                        return 0
+                        ;;
+                esac
+            fi
 
             local PROG_TYPE='id pinned tag'
             case $command in
@@ -252,8 +293,57 @@ _bpftool()
                     return 0
                     ;;
                 load)
-                    _filedir
-                    return 0
+                    local obj
+
+                    if [[ ${#words[@]} -lt 6 ]]; then
+                        _filedir
+                        return 0
+                    fi
+
+                    obj=${words[3]}
+
+                    if [[ ${words[-4]} == "map" ]]; then
+                        COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) )
+                        return 0
+                    fi
+                    if [[ ${words[-3]} == "map" ]]; then
+                        if [[ ${words[-2]} == "idx" ]]; then
+                            _bpftool_get_obj_map_idxs $obj
+                        elif [[ ${words[-2]} == "name" ]]; then
+                            _bpftool_get_obj_map_names $obj
+                        fi
+                        return 0
+                    fi
+                    if [[ ${words[-2]} == "map" ]]; then
+                        COMPREPLY=( $( compgen -W "idx name" -- "$cur" ) )
+                        return 0
+                    fi
+
+                    case $prev in
+                        type)
+                            COMPREPLY=( $( compgen -W "socket kprobe kretprobe classifier action tracepoint raw_tracepoint xdp perf_event cgroup/skb cgroup/sock cgroup/dev lwt_in lwt_out lwt_xmit lwt_seg6local sockops sk_skb sk_msg lirc_mode2 cgroup/bind4 cgroup/bind6 cgroup/connect4 cgroup/connect6 cgroup/sendmsg4 cgroup/sendmsg6 cgroup/post_bind4 cgroup/post_bind6" -- \
+                                                   "$cur" ) )
+                            return 0
+                            ;;
+                        id)
+                            _bpftool_get_map_ids
+                            return 0
+                            ;;
+                        pinned)
+                            _filedir
+                            return 0
+                            ;;
+                        dev)
+                            _sysfs_get_netdevs
+                            return 0
+                            ;;
+                        *)
+                            COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
+                            _bpftool_once_attr 'type'
+                            _bpftool_once_attr 'dev'
+                            return 0
+                            ;;
+                    esac
                     ;;
                 *)
                     [[ $prev == $object ]] && \
@@ -404,6 +494,10 @@ _bpftool()
                     _filedir
                     return 0
                     ;;
+               tree)
+                   _filedir
+                   return 0
+                   ;;
                 attach|detach)
                     local ATTACH_TYPES='ingress egress sock_create sock_ops \
                         device bind4 bind6 post_bind4 post_bind6 connect4 \
@@ -445,7 +539,7 @@ _bpftool()
                 *)
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'help attach detach \
-                            show list' -- "$cur" ) )
+                            show list tree' -- "$cur" ) )
                     ;;
             esac
             ;;
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
new file mode 100644 (file)
index 0000000..55bc512
--- /dev/null
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <ctype.h>
+#include <stdio.h> /* for (FILE *) used by json_writer */
+#include <string.h>
+#include <asm/byteorder.h>
+#include <linux/bitops.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+
+#include "btf.h"
+#include "json_writer.h"
+#include "main.h"
+
+#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
+#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
+#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
+#define BITS_ROUNDUP_BYTES(bits) \
+       (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
+
+static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
+                             __u8 bit_offset, const void *data);
+
+static void btf_dumper_ptr(const void *data, json_writer_t *jw,
+                          bool is_plain_text)
+{
+       if (is_plain_text)
+               jsonw_printf(jw, "%p", *(unsigned long *)data);
+       else
+               jsonw_printf(jw, "%u", *(unsigned long *)data);
+}
+
+static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
+                              const void *data)
+{
+       int actual_type_id;
+
+       actual_type_id = btf__resolve_type(d->btf, type_id);
+       if (actual_type_id < 0)
+               return actual_type_id;
+
+       return btf_dumper_do_type(d, actual_type_id, 0, data);
+}
+
+static void btf_dumper_enum(const void *data, json_writer_t *jw)
+{
+       jsonw_printf(jw, "%d", *(int *)data);
+}
+
+static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id,
+                           const void *data)
+{
+       const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+       struct btf_array *arr = (struct btf_array *)(t + 1);
+       long long elem_size;
+       int ret = 0;
+       __u32 i;
+
+       elem_size = btf__resolve_size(d->btf, arr->type);
+       if (elem_size < 0)
+               return elem_size;
+
+       jsonw_start_array(d->jw);
+       for (i = 0; i < arr->nelems; i++) {
+               ret = btf_dumper_do_type(d, arr->type, 0,
+                                        data + i * elem_size);
+               if (ret)
+                       break;
+       }
+
+       jsonw_end_array(d->jw);
+       return ret;
+}
+
+static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset,
+                               const void *data, json_writer_t *jw,
+                               bool is_plain_text)
+{
+       int left_shift_bits, right_shift_bits;
+       int nr_bits = BTF_INT_BITS(int_type);
+       int total_bits_offset;
+       int bytes_to_copy;
+       int bits_to_copy;
+       __u64 print_num;
+
+       total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
+       data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
+       bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
+       bits_to_copy = bit_offset + nr_bits;
+       bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);
+
+       print_num = 0;
+       memcpy(&print_num, data, bytes_to_copy);
+#if defined(__BIG_ENDIAN_BITFIELD)
+       left_shift_bits = bit_offset;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       left_shift_bits = 64 - bits_to_copy;
+#else
+#error neither big nor little endian
+#endif
+       right_shift_bits = 64 - nr_bits;
+
+       print_num <<= left_shift_bits;
+       print_num >>= right_shift_bits;
+       if (is_plain_text)
+               jsonw_printf(jw, "0x%llx", print_num);
+       else
+               jsonw_printf(jw, "%llu", print_num);
+}
+
+static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
+                         const void *data, json_writer_t *jw,
+                         bool is_plain_text)
+{
+       __u32 *int_type;
+       __u32 nr_bits;
+
+       int_type = (__u32 *)(t + 1);
+       nr_bits = BTF_INT_BITS(*int_type);
+       /* if this is bit field */
+       if (bit_offset || BTF_INT_OFFSET(*int_type) ||
+           BITS_PER_BYTE_MASKED(nr_bits)) {
+               btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+                                   is_plain_text);
+               return 0;
+       }
+
+       switch (BTF_INT_ENCODING(*int_type)) {
+       case 0:
+               if (BTF_INT_BITS(*int_type) == 64)
+                       jsonw_printf(jw, "%lu", *(__u64 *)data);
+               else if (BTF_INT_BITS(*int_type) == 32)
+                       jsonw_printf(jw, "%u", *(__u32 *)data);
+               else if (BTF_INT_BITS(*int_type) == 16)
+                       jsonw_printf(jw, "%hu", *(__u16 *)data);
+               else if (BTF_INT_BITS(*int_type) == 8)
+                       jsonw_printf(jw, "%hhu", *(__u8 *)data);
+               else
+                       btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+                                           is_plain_text);
+               break;
+       case BTF_INT_SIGNED:
+               if (BTF_INT_BITS(*int_type) == 64)
+                       jsonw_printf(jw, "%ld", *(long long *)data);
+               else if (BTF_INT_BITS(*int_type) == 32)
+                       jsonw_printf(jw, "%d", *(int *)data);
+               else if (BTF_INT_BITS(*int_type) == 16)
+                       jsonw_printf(jw, "%hd", *(short *)data);
+               else if (BTF_INT_BITS(*int_type) == 8)
+                       jsonw_printf(jw, "%hhd", *(char *)data);
+               else
+                       btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+                                           is_plain_text);
+               break;
+       case BTF_INT_CHAR:
+               if (isprint(*(char *)data))
+                       jsonw_printf(jw, "\"%c\"", *(char *)data);
+               else
+                       if (is_plain_text)
+                               jsonw_printf(jw, "0x%hhx", *(char *)data);
+                       else
+                               jsonw_printf(jw, "\"\\u00%02hhx\"",
+                                            *(char *)data);
+               break;
+       case BTF_INT_BOOL:
+               jsonw_bool(jw, *(int *)data);
+               break;
+       default:
+               /* shouldn't happen */
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id,
+                            const void *data)
+{
+       const struct btf_type *t;
+       struct btf_member *m;
+       const void *data_off;
+       int ret = 0;
+       int i, vlen;
+
+       t = btf__type_by_id(d->btf, type_id);
+       if (!t)
+               return -EINVAL;
+
+       vlen = BTF_INFO_VLEN(t->info);
+       jsonw_start_object(d->jw);
+       m = (struct btf_member *)(t + 1);
+
+       for (i = 0; i < vlen; i++) {
+               data_off = data + BITS_ROUNDDOWN_BYTES(m[i].offset);
+               jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off));
+               ret = btf_dumper_do_type(d, m[i].type,
+                                        BITS_PER_BYTE_MASKED(m[i].offset),
+                                        data_off);
+               if (ret)
+                       break;
+       }
+
+       jsonw_end_object(d->jw);
+
+       return ret;
+}
+
+static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
+                             __u8 bit_offset, const void *data)
+{
+       const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+
+       switch (BTF_INFO_KIND(t->info)) {
+       case BTF_KIND_INT:
+               return btf_dumper_int(t, bit_offset, data, d->jw,
+                                    d->is_plain_text);
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION:
+               return btf_dumper_struct(d, type_id, data);
+       case BTF_KIND_ARRAY:
+               return btf_dumper_array(d, type_id, data);
+       case BTF_KIND_ENUM:
+               btf_dumper_enum(data, d->jw);
+               return 0;
+       case BTF_KIND_PTR:
+               btf_dumper_ptr(data, d->jw, d->is_plain_text);
+               return 0;
+       case BTF_KIND_UNKN:
+               jsonw_printf(d->jw, "(unknown)");
+               return 0;
+       case BTF_KIND_FWD:
+               /* map key or value can't be forward */
+               jsonw_printf(d->jw, "(fwd-kind-invalid)");
+               return -EINVAL;
+       case BTF_KIND_TYPEDEF:
+       case BTF_KIND_VOLATILE:
+       case BTF_KIND_CONST:
+       case BTF_KIND_RESTRICT:
+               return btf_dumper_modifier(d, type_id, data);
+       default:
+               jsonw_printf(d->jw, "(unsupported-kind");
+               return -EINVAL;
+       }
+}
+
+int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
+                   const void *data)
+{
+       return btf_dumper_do_type(d, type_id, 0, data);
+}
index 16bee011e16cc883e99fcc49451b896ce713d636..ee7a9765c6b32f3eb9e22b79f8a112dd38983405 100644 (file)
@@ -2,7 +2,12 @@
 // Copyright (C) 2017 Facebook
 // Author: Roman Gushchin <guro@fb.com>
 
+#define _XOPEN_SOURCE 500
+#include <errno.h>
 #include <fcntl.h>
+#include <ftw.h>
+#include <mntent.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
@@ -53,7 +58,8 @@ static enum bpf_attach_type parse_attach_type(const char *str)
 }
 
 static int show_bpf_prog(int id, const char *attach_type_str,
-                        const char *attach_flags_str)
+                        const char *attach_flags_str,
+                        int level)
 {
        struct bpf_prog_info info = {};
        __u32 info_len = sizeof(info);
@@ -78,7 +84,8 @@ static int show_bpf_prog(int id, const char *attach_type_str,
                jsonw_string_field(json_wtr, "name", info.name);
                jsonw_end_object(json_wtr);
        } else {
-               printf("%-8u %-15s %-15s %-15s\n", info.id,
+               printf("%s%-8u %-15s %-15s %-15s\n", level ? "    " : "",
+                      info.id,
                       attach_type_str,
                       attach_flags_str,
                       info.name);
@@ -88,7 +95,20 @@ static int show_bpf_prog(int id, const char *attach_type_str,
        return 0;
 }
 
-static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
+static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
+{
+       __u32 prog_cnt = 0;
+       int ret;
+
+       ret = bpf_prog_query(cgroup_fd, type, 0, NULL, NULL, &prog_cnt);
+       if (ret)
+               return -1;
+
+       return prog_cnt;
+}
+
+static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
+                                  int level)
 {
        __u32 prog_ids[1024] = {0};
        char *attach_flags_str;
@@ -123,7 +143,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
 
        for (iter = 0; iter < prog_cnt; iter++)
                show_bpf_prog(prog_ids[iter], attach_type_strings[type],
-                             attach_flags_str);
+                             attach_flags_str, level);
 
        return 0;
 }
@@ -161,7 +181,7 @@ static int do_show(int argc, char **argv)
                 * If we were able to get the show for at least one
                 * attach type, let's return 0.
                 */
-               if (show_attached_bpf_progs(cgroup_fd, type) == 0)
+               if (show_attached_bpf_progs(cgroup_fd, type, 0) == 0)
                        ret = 0;
        }
 
@@ -173,6 +193,143 @@ exit:
        return ret;
 }
 
+/*
+ * To distinguish nftw() errors and do_show_tree_fn() errors
+ * and avoid duplicating error messages, let's return -2
+ * from do_show_tree_fn() in case of error.
+ */
+#define NFTW_ERR               -1
+#define SHOW_TREE_FN_ERR       -2
+static int do_show_tree_fn(const char *fpath, const struct stat *sb,
+                          int typeflag, struct FTW *ftw)
+{
+       enum bpf_attach_type type;
+       bool skip = true;
+       int cgroup_fd;
+
+       if (typeflag != FTW_D)
+               return 0;
+
+       cgroup_fd = open(fpath, O_RDONLY);
+       if (cgroup_fd < 0) {
+               p_err("can't open cgroup %s: %s", fpath, strerror(errno));
+               return SHOW_TREE_FN_ERR;
+       }
+
+       for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+               int count = count_attached_bpf_progs(cgroup_fd, type);
+
+               if (count < 0 && errno != EINVAL) {
+                       p_err("can't query bpf programs attached to %s: %s",
+                             fpath, strerror(errno));
+                       close(cgroup_fd);
+                       return SHOW_TREE_FN_ERR;
+               }
+               if (count > 0) {
+                       skip = false;
+                       break;
+               }
+       }
+
+       if (skip) {
+               close(cgroup_fd);
+               return 0;
+       }
+
+       if (json_output) {
+               jsonw_start_object(json_wtr);
+               jsonw_string_field(json_wtr, "cgroup", fpath);
+               jsonw_name(json_wtr, "programs");
+               jsonw_start_array(json_wtr);
+       } else {
+               printf("%s\n", fpath);
+       }
+
+       for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
+               show_attached_bpf_progs(cgroup_fd, type, ftw->level);
+
+       if (json_output) {
+               jsonw_end_array(json_wtr);
+               jsonw_end_object(json_wtr);
+       }
+
+       close(cgroup_fd);
+
+       return 0;
+}
+
+static char *find_cgroup_root(void)
+{
+       struct mntent *mnt;
+       FILE *f;
+
+       f = fopen("/proc/mounts", "r");
+       if (f == NULL)
+               return NULL;
+
+       while ((mnt = getmntent(f))) {
+               if (strcmp(mnt->mnt_type, "cgroup2") == 0) {
+                       fclose(f);
+                       return strdup(mnt->mnt_dir);
+               }
+       }
+
+       fclose(f);
+       return NULL;
+}
+
+static int do_show_tree(int argc, char **argv)
+{
+       char *cgroup_root;
+       int ret;
+
+       switch (argc) {
+       case 0:
+               cgroup_root = find_cgroup_root();
+               if (!cgroup_root) {
+                       p_err("cgroup v2 isn't mounted");
+                       return -1;
+               }
+               break;
+       case 1:
+               cgroup_root = argv[0];
+               break;
+       default:
+               p_err("too many parameters for cgroup tree");
+               return -1;
+       }
+
+
+       if (json_output)
+               jsonw_start_array(json_wtr);
+       else
+               printf("%s\n"
+                      "%-8s %-15s %-15s %-15s\n",
+                      "CgroupPath",
+                      "ID", "AttachType", "AttachFlags", "Name");
+
+       switch (nftw(cgroup_root, do_show_tree_fn, 1024, FTW_MOUNT)) {
+       case NFTW_ERR:
+               p_err("can't iterate over %s: %s", cgroup_root,
+                     strerror(errno));
+               ret = -1;
+               break;
+       case SHOW_TREE_FN_ERR:
+               ret = -1;
+               break;
+       default:
+               ret = 0;
+       }
+
+       if (json_output)
+               jsonw_end_array(json_wtr);
+
+       if (argc == 0)
+               free(cgroup_root);
+
+       return ret;
+}
+
 static int do_attach(int argc, char **argv)
 {
        enum bpf_attach_type attach_type;
@@ -289,6 +446,7 @@ static int do_help(int argc, char **argv)
 
        fprintf(stderr,
                "Usage: %s %s { show | list } CGROUP\n"
+               "       %s %s tree [CGROUP_ROOT]\n"
                "       %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
                "       %s %s detach CGROUP ATTACH_TYPE PROG\n"
                "       %s %s help\n"
@@ -298,6 +456,7 @@ static int do_help(int argc, char **argv)
                "       " HELP_SPEC_PROGRAM "\n"
                "       " HELP_SPEC_OPTIONS "\n"
                "",
+               bin_name, argv[-2],
                bin_name, argv[-2], bin_name, argv[-2],
                bin_name, argv[-2], bin_name, argv[-2]);
 
@@ -307,6 +466,7 @@ static int do_help(int argc, char **argv)
 static const struct cmd cmds[] = {
        { "show",       do_show },
        { "list",       do_show },
+       { "tree",       do_show_tree },
        { "attach",     do_attach },
        { "detach",     do_detach },
        { "help",       do_help },
index 3f140eff039fc699f99a30f231211120fa88ed53..b3a0709ea7ede6f6c5ebe3a49083c561f5d47a1d 100644 (file)
@@ -31,8 +31,6 @@
  * SOFTWARE.
  */
 
-/* Author: Jakub Kicinski <kubakici@wp.pl> */
-
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
index eea7f14355f3273f2885e370cbd5a47a99d0bded..d15a62be6cf0fd0bbe306e9420fc07d9848ce679 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -31,8 +31,6 @@
  * SOFTWARE.
  */
 
-/* Author: Jakub Kicinski <kubakici@wp.pl> */
-
 #include <bfd.h>
 #include <ctype.h>
 #include <errno.h>
index 63fdb310b9a4aceddad6f5c935c4075d3f9b522e..238e734d75b3eb616e287f0eb47ff5645d94eccd 100644 (file)
@@ -31,8 +31,6 @@
  * SOFTWARE.
  */
 
-/* Author: Jakub Kicinski <kubakici@wp.pl> */
-
 #ifndef __BPF_TOOL_H
 #define __BPF_TOOL_H
 
@@ -44,6 +42,7 @@
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/hashtable.h>
+#include <tools/libc_compat.h>
 
 #include "json_writer.h"
 
 #define NEXT_ARG()     ({ argc--; argv++; if (argc < 0) usage(); })
 #define NEXT_ARGP()    ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
 #define BAD_ARG()      ({ p_err("what is '%s'?", *argv); -1; })
+#define GET_ARG()      ({ argc--; *argv++; })
+#define REQ_ARGS(cnt)                                                  \
+       ({                                                              \
+               int _cnt = (cnt);                                       \
+               bool _res;                                              \
+                                                                       \
+               if (argc < _cnt) {                                      \
+                       p_err("'%s' needs at least %d arguments, %d found", \
+                             argv[-1], _cnt, argc);                    \
+                       _res = false;                                   \
+               } else {                                                \
+                       _res = true;                                    \
+               }                                                       \
+               _res;                                                   \
+       })
 
 #define ERR_MAX_LEN    1024
 
@@ -61,6 +75,8 @@
        "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }"
 #define HELP_SPEC_OPTIONS                                              \
        "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} }"
+#define HELP_SPEC_MAP                                                  \
+       "MAP := { id MAP_ID | pinned FILE }"
 
 enum bpf_obj_type {
        BPF_OBJ_UNKNOWN,
@@ -122,6 +138,7 @@ int do_cgroup(int argc, char **arg);
 int do_perf(int argc, char **arg);
 
 int prog_parse_fd(int *argc, char ***argv);
+int map_parse_fd(int *argc, char ***argv);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
 
 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
@@ -133,4 +150,19 @@ unsigned int get_page_size(void);
 unsigned int get_possible_cpus(void);
 const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
 
+struct btf_dumper {
+       const struct btf *btf;
+       json_writer_t *jw;
+       bool is_plain_text;
+};
+
+/* btf_dumper_type - print data along with type information
+ * @d: an instance containing context for dumping types
+ * @type_id: index in btf->types array. this points to the type to be dumped
+ * @data: pointer the actual data, i.e. the values to be printed
+ *
+ * Returns zero on success and negative error code otherwise
+ */
+int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
+                   const void *data);
 #endif
index f74a8bcbda874a8cfa0595257e1002e047f5559a..e860ca859b28419ea45776209d9eac755242749f 100644 (file)
  * SOFTWARE.
  */
 
-/* Author: Jakub Kicinski <kubakici@wp.pl> */
-
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <linux/err.h>
 #include <linux/kernel.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -47,6 +46,8 @@
 
 #include <bpf.h>
 
+#include "btf.h"
+#include "json_writer.h"
 #include "main.h"
 
 static const char * const map_type_name[] = {
@@ -97,7 +98,7 @@ static void *alloc_value(struct bpf_map_info *info)
                return malloc(info->value_size);
 }
 
-static int map_parse_fd(int *argc, char ***argv)
+int map_parse_fd(int *argc, char ***argv)
 {
        int fd;
 
@@ -152,8 +153,109 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
        return fd;
 }
 
+static int do_dump_btf(const struct btf_dumper *d,
+                      struct bpf_map_info *map_info, void *key,
+                      void *value)
+{
+       int ret;
+
+       /* start of key-value pair */
+       jsonw_start_object(d->jw);
+
+       jsonw_name(d->jw, "key");
+
+       ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
+       if (ret)
+               goto err_end_obj;
+
+       jsonw_name(d->jw, "value");
+
+       ret = btf_dumper_type(d, map_info->btf_value_type_id, value);
+
+err_end_obj:
+       /* end of key-value pair */
+       jsonw_end_object(d->jw);
+
+       return ret;
+}
+
+static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
+{
+       struct bpf_btf_info btf_info = { 0 };
+       __u32 len = sizeof(btf_info);
+       __u32 last_size;
+       int btf_fd;
+       void *ptr;
+       int err;
+
+       err = 0;
+       *btf = NULL;
+       btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id);
+       if (btf_fd < 0)
+               return 0;
+
+       /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+        * let's start with a sane default - 4KiB here - and resize it only if
+        * bpf_obj_get_info_by_fd() needs a bigger buffer.
+        */
+       btf_info.btf_size = 4096;
+       last_size = btf_info.btf_size;
+       ptr = malloc(last_size);
+       if (!ptr) {
+               err = -ENOMEM;
+               goto exit_free;
+       }
+
+       bzero(ptr, last_size);
+       btf_info.btf = ptr_to_u64(ptr);
+       err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+
+       if (!err && btf_info.btf_size > last_size) {
+               void *temp_ptr;
+
+               last_size = btf_info.btf_size;
+               temp_ptr = realloc(ptr, last_size);
+               if (!temp_ptr) {
+                       err = -ENOMEM;
+                       goto exit_free;
+               }
+               ptr = temp_ptr;
+               bzero(ptr, last_size);
+               btf_info.btf = ptr_to_u64(ptr);
+               err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+       }
+
+       if (err || btf_info.btf_size > last_size) {
+               err = errno;
+               goto exit_free;
+       }
+
+       *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
+       if (IS_ERR(*btf)) {
+               err = PTR_ERR(btf);
+               *btf = NULL;
+       }
+
+exit_free:
+       close(btf_fd);
+       free(ptr);
+
+       return err;
+}
+
+static json_writer_t *get_btf_writer(void)
+{
+       json_writer_t *jw = jsonw_new(stdout);
+
+       if (!jw)
+               return NULL;
+       jsonw_pretty(jw, true);
+
+       return jw;
+}
+
 static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
-                            unsigned char *value)
+                            unsigned char *value, struct btf *btf)
 {
        jsonw_start_object(json_wtr);
 
@@ -162,6 +264,16 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
                print_hex_data_json(key, info->key_size);
                jsonw_name(json_wtr, "value");
                print_hex_data_json(value, info->value_size);
+               if (btf) {
+                       struct btf_dumper d = {
+                               .btf = btf,
+                               .jw = json_wtr,
+                               .is_plain_text = false,
+                       };
+
+                       jsonw_name(json_wtr, "formatted");
+                       do_dump_btf(&d, info, key, value);
+               }
        } else {
                unsigned int i, n, step;
 
@@ -514,10 +626,12 @@ static int do_show(int argc, char **argv)
 
 static int do_dump(int argc, char **argv)
 {
+       struct bpf_map_info info = {};
        void *key, *value, *prev_key;
        unsigned int num_elems = 0;
-       struct bpf_map_info info = {};
        __u32 len = sizeof(info);
+       json_writer_t *btf_wtr;
+       struct btf *btf = NULL;
        int err;
        int fd;
 
@@ -543,8 +657,27 @@ static int do_dump(int argc, char **argv)
        }
 
        prev_key = NULL;
+
+       err = get_btf(&info, &btf);
+       if (err) {
+               p_err("failed to get btf");
+               goto exit_free;
+       }
+
        if (json_output)
                jsonw_start_array(json_wtr);
+       else
+               if (btf) {
+                       btf_wtr = get_btf_writer();
+                       if (!btf_wtr) {
+                               p_info("failed to create json writer for btf. falling back to plain output");
+                               btf__free(btf);
+                               btf = NULL;
+                       } else {
+                               jsonw_start_array(btf_wtr);
+                       }
+               }
+
        while (true) {
                err = bpf_map_get_next_key(fd, prev_key, key);
                if (err) {
@@ -555,9 +688,19 @@ static int do_dump(int argc, char **argv)
 
                if (!bpf_map_lookup_elem(fd, key, value)) {
                        if (json_output)
-                               print_entry_json(&info, key, value);
+                               print_entry_json(&info, key, value, btf);
                        else
-                               print_entry_plain(&info, key, value);
+                               if (btf) {
+                                       struct btf_dumper d = {
+                                               .btf = btf,
+                                               .jw = btf_wtr,
+                                               .is_plain_text = true,
+                                       };
+
+                                       do_dump_btf(&d, &info, key, value);
+                               } else {
+                                       print_entry_plain(&info, key, value);
+                               }
                } else {
                        if (json_output) {
                                jsonw_name(json_wtr, "key");
@@ -580,14 +723,19 @@ static int do_dump(int argc, char **argv)
 
        if (json_output)
                jsonw_end_array(json_wtr);
-       else
+       else if (btf) {
+               jsonw_end_array(btf_wtr);
+               jsonw_destroy(&btf_wtr);
+       } else {
                printf("Found %u element%s\n", num_elems,
                       num_elems != 1 ? "s" : "");
+       }
 
 exit_free:
        free(key);
        free(value);
        close(fd);
+       btf__free(btf);
 
        return err;
 }
@@ -643,6 +791,8 @@ static int do_lookup(int argc, char **argv)
 {
        struct bpf_map_info info = {};
        __u32 len = sizeof(info);
+       json_writer_t *btf_wtr;
+       struct btf *btf = NULL;
        void *key, *value;
        int err;
        int fd;
@@ -667,27 +817,60 @@ static int do_lookup(int argc, char **argv)
                goto exit_free;
 
        err = bpf_map_lookup_elem(fd, key, value);
-       if (!err) {
-               if (json_output)
-                       print_entry_json(&info, key, value);
-               else
+       if (err) {
+               if (errno == ENOENT) {
+                       if (json_output) {
+                               jsonw_null(json_wtr);
+                       } else {
+                               printf("key:\n");
+                               fprint_hex(stdout, key, info.key_size, " ");
+                               printf("\n\nNot found\n");
+                       }
+               } else {
+                       p_err("lookup failed: %s", strerror(errno));
+               }
+
+               goto exit_free;
+       }
+
+       /* here means bpf_map_lookup_elem() succeeded */
+       err = get_btf(&info, &btf);
+       if (err) {
+               p_err("failed to get btf");
+               goto exit_free;
+       }
+
+       if (json_output) {
+               print_entry_json(&info, key, value, btf);
+       } else if (btf) {
+               /* if here json_wtr wouldn't have been initialised,
+                * so let's create separate writer for btf
+                */
+               btf_wtr = get_btf_writer();
+               if (!btf_wtr) {
+                       p_info("failed to create json writer for btf. falling back to plain output");
+                       btf__free(btf);
+                       btf = NULL;
                        print_entry_plain(&info, key, value);
-       } else if (errno == ENOENT) {
-               if (json_output) {
-                       jsonw_null(json_wtr);
                } else {
-                       printf("key:\n");
-                       fprint_hex(stdout, key, info.key_size, " ");
-                       printf("\n\nNot found\n");
+                       struct btf_dumper d = {
+                               .btf = btf,
+                               .jw = btf_wtr,
+                               .is_plain_text = true,
+                       };
+
+                       do_dump_btf(&d, &info, key, value);
+                       jsonw_destroy(&btf_wtr);
                }
        } else {
-               p_err("lookup failed: %s", strerror(errno));
+               print_entry_plain(&info, key, value);
        }
 
 exit_free:
        free(key);
        free(value);
        close(fd);
+       btf__free(btf);
 
        return err;
 }
@@ -830,7 +1013,7 @@ static int do_help(int argc, char **argv)
                "       %s %s event_pipe MAP [cpu N index M]\n"
                "       %s %s help\n"
                "\n"
-               "       MAP := { id MAP_ID | pinned FILE }\n"
+               "       " HELP_SPEC_MAP "\n"
                "       DATA := { [hex] BYTES }\n"
                "       " HELP_SPEC_PROGRAM "\n"
                "       VALUE := { DATA | MAP | PROG }\n"
index 959aa53ab6789f839442326359701b17ba9e337c..dce960d22106fb173e2e5e0195f0472209d2bb71 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -31,8 +31,7 @@
  * SOFTWARE.
  */
 
-/* Author: Jakub Kicinski <kubakici@wp.pl> */
-
+#define _GNU_SOURCE
 #include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
+#include <net/if.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <linux/err.h>
+
 #include <bpf.h>
 #include <libbpf.h>
 
@@ -681,31 +683,247 @@ static int do_pin(int argc, char **argv)
        return err;
 }
 
+struct map_replace {
+       int idx;
+       int fd;
+       char *name;
+};
+
+int map_replace_compar(const void *p1, const void *p2)
+{
+       const struct map_replace *a = p1, *b = p2;
+
+       return a->idx - b->idx;
+}
+
 static int do_load(int argc, char **argv)
 {
+       enum bpf_attach_type expected_attach_type;
+       struct bpf_object_open_attr attr = {
+               .prog_type      = BPF_PROG_TYPE_UNSPEC,
+       };
+       struct map_replace *map_replace = NULL;
+       unsigned int old_map_fds = 0;
+       struct bpf_program *prog;
        struct bpf_object *obj;
-       int prog_fd;
-
-       if (argc != 2)
-               usage();
+       struct bpf_map *map;
+       const char *pinfile;
+       unsigned int i, j;
+       __u32 ifindex = 0;
+       int idx, err;
 
-       if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) {
-               p_err("failed to load program");
+       if (!REQ_ARGS(2))
                return -1;
+       attr.file = GET_ARG();
+       pinfile = GET_ARG();
+
+       while (argc) {
+               if (is_prefix(*argv, "type")) {
+                       char *type;
+
+                       NEXT_ARG();
+
+                       if (attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
+                               p_err("program type already specified");
+                               goto err_free_reuse_maps;
+                       }
+                       if (!REQ_ARGS(1))
+                               goto err_free_reuse_maps;
+
+                       /* Put a '/' at the end of type to appease libbpf */
+                       type = malloc(strlen(*argv) + 2);
+                       if (!type) {
+                               p_err("mem alloc failed");
+                               goto err_free_reuse_maps;
+                       }
+                       *type = 0;
+                       strcat(type, *argv);
+                       strcat(type, "/");
+
+                       err = libbpf_prog_type_by_name(type, &attr.prog_type,
+                                                      &expected_attach_type);
+                       free(type);
+                       if (err < 0) {
+                               p_err("unknown program type '%s'", *argv);
+                               goto err_free_reuse_maps;
+                       }
+                       NEXT_ARG();
+               } else if (is_prefix(*argv, "map")) {
+                       char *endptr, *name;
+                       int fd;
+
+                       NEXT_ARG();
+
+                       if (!REQ_ARGS(4))
+                               goto err_free_reuse_maps;
+
+                       if (is_prefix(*argv, "idx")) {
+                               NEXT_ARG();
+
+                               idx = strtoul(*argv, &endptr, 0);
+                               if (*endptr) {
+                                       p_err("can't parse %s as IDX", *argv);
+                                       goto err_free_reuse_maps;
+                               }
+                               name = NULL;
+                       } else if (is_prefix(*argv, "name")) {
+                               NEXT_ARG();
+
+                               name = *argv;
+                               idx = -1;
+                       } else {
+                               p_err("expected 'idx' or 'name', got: '%s'?",
+                                     *argv);
+                               goto err_free_reuse_maps;
+                       }
+                       NEXT_ARG();
+
+                       fd = map_parse_fd(&argc, &argv);
+                       if (fd < 0)
+                               goto err_free_reuse_maps;
+
+                       map_replace = reallocarray(map_replace, old_map_fds + 1,
+                                                  sizeof(*map_replace));
+                       if (!map_replace) {
+                               p_err("mem alloc failed");
+                               goto err_free_reuse_maps;
+                       }
+                       map_replace[old_map_fds].idx = idx;
+                       map_replace[old_map_fds].name = name;
+                       map_replace[old_map_fds].fd = fd;
+                       old_map_fds++;
+               } else if (is_prefix(*argv, "dev")) {
+                       NEXT_ARG();
+
+                       if (ifindex) {
+                               p_err("offload device already specified");
+                               goto err_free_reuse_maps;
+                       }
+                       if (!REQ_ARGS(1))
+                               goto err_free_reuse_maps;
+
+                       ifindex = if_nametoindex(*argv);
+                       if (!ifindex) {
+                               p_err("unrecognized netdevice '%s': %s",
+                                     *argv, strerror(errno));
+                               goto err_free_reuse_maps;
+                       }
+                       NEXT_ARG();
+               } else {
+                       p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?",
+                             *argv);
+                       goto err_free_reuse_maps;
+               }
+       }
+
+       obj = bpf_object__open_xattr(&attr);
+       if (IS_ERR_OR_NULL(obj)) {
+               p_err("failed to open object file");
+               goto err_free_reuse_maps;
+       }
+
+       prog = bpf_program__next(NULL, obj);
+       if (!prog) {
+               p_err("object file doesn't contain any bpf program");
+               goto err_close_obj;
+       }
+
+       bpf_program__set_ifindex(prog, ifindex);
+       if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
+               const char *sec_name = bpf_program__title(prog, false);
+
+               err = libbpf_prog_type_by_name(sec_name, &attr.prog_type,
+                                              &expected_attach_type);
+               if (err < 0) {
+                       p_err("failed to guess program type based on section name %s\n",
+                             sec_name);
+                       goto err_close_obj;
+               }
+       }
+       bpf_program__set_type(prog, attr.prog_type);
+       bpf_program__set_expected_attach_type(prog, expected_attach_type);
+
+       qsort(map_replace, old_map_fds, sizeof(*map_replace),
+             map_replace_compar);
+
+       /* After the sort maps by name will be first on the list, because they
+        * have idx == -1.  Resolve them.
+        */
+       j = 0;
+       while (j < old_map_fds && map_replace[j].name) {
+               i = 0;
+               bpf_map__for_each(map, obj) {
+                       if (!strcmp(bpf_map__name(map), map_replace[j].name)) {
+                               map_replace[j].idx = i;
+                               break;
+                       }
+                       i++;
+               }
+               if (map_replace[j].idx == -1) {
+                       p_err("unable to find map '%s'", map_replace[j].name);
+                       goto err_close_obj;
+               }
+               j++;
+       }
+       /* Resort if any names were resolved */
+       if (j)
+               qsort(map_replace, old_map_fds, sizeof(*map_replace),
+                     map_replace_compar);
+
+       /* Set ifindex and name reuse */
+       j = 0;
+       idx = 0;
+       bpf_map__for_each(map, obj) {
+               if (!bpf_map__is_offload_neutral(map))
+                       bpf_map__set_ifindex(map, ifindex);
+
+               if (j < old_map_fds && idx == map_replace[j].idx) {
+                       err = bpf_map__reuse_fd(map, map_replace[j++].fd);
+                       if (err) {
+                               p_err("unable to set up map reuse: %d", err);
+                               goto err_close_obj;
+                       }
+
+                       /* Next reuse wants to apply to the same map */
+                       if (j < old_map_fds && map_replace[j].idx == idx) {
+                               p_err("replacement for map idx %d specified more than once",
+                                     idx);
+                               goto err_close_obj;
+                       }
+               }
+
+               idx++;
+       }
+       if (j < old_map_fds) {
+               p_err("map idx '%d' not used", map_replace[j].idx);
+               goto err_close_obj;
+       }
+
+       err = bpf_object__load(obj);
+       if (err) {
+               p_err("failed to load object file");
+               goto err_close_obj;
        }
 
-       if (do_pin_fd(prog_fd, argv[1]))
+       if (do_pin_fd(bpf_program__fd(prog), pinfile))
                goto err_close_obj;
 
        if (json_output)
                jsonw_null(json_wtr);
 
        bpf_object__close(obj);
+       for (i = 0; i < old_map_fds; i++)
+               close(map_replace[i].fd);
+       free(map_replace);
 
        return 0;
 
 err_close_obj:
        bpf_object__close(obj);
+err_free_reuse_maps:
+       for (i = 0; i < old_map_fds; i++)
+               close(map_replace[i].fd);
+       free(map_replace);
        return -1;
 }
 
@@ -721,10 +939,19 @@ static int do_help(int argc, char **argv)
                "       %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n"
                "       %s %s dump jited  PROG [{ file FILE | opcodes }]\n"
                "       %s %s pin   PROG FILE\n"
-               "       %s %s load  OBJ  FILE\n"
+               "       %s %s load  OBJ  FILE [type TYPE] [dev NAME] \\\n"
+               "                         [map { idx IDX | name NAME } MAP]\n"
                "       %s %s help\n"
                "\n"
+               "       " HELP_SPEC_MAP "\n"
                "       " HELP_SPEC_PROGRAM "\n"
+               "       TYPE := { socket | kprobe | kretprobe | classifier | action |\n"
+               "                 tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n"
+               "                 cgroup/sock | cgroup/dev | lwt_in | lwt_out | lwt_xmit |\n"
+               "                 lwt_seg6local | sockops | sk_skb | sk_msg | lirc_mode2 |\n"
+               "                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
+               "                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
+               "                 cgroup/sendmsg4 | cgroup/sendmsg6 }\n"
                "       " HELP_SPEC_OPTIONS "\n"
                "",
                bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
index b97f1da60dd1d75f401501428f4c1bfdecdd69f3..3284759df98ad4f325b25f719db0cfef7ac9f61c 100644 (file)
@@ -35,6 +35,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#define _GNU_SOURCE
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -66,9 +67,8 @@ void kernel_syms_load(struct dump_data *dd)
        while (!feof(fp)) {
                if (!fgets(buff, sizeof(buff), fp))
                        break;
-               tmp = realloc(dd->sym_mapping,
-                             (dd->sym_count + 1) *
-                             sizeof(*dd->sym_mapping));
+               tmp = reallocarray(dd->sym_mapping, dd->sym_count + 1,
+                                  sizeof(*dd->sym_mapping));
                if (!tmp) {
 out:
                        free(dd->sym_mapping);
index 5b6dda3b1ca8f238e3f55660ca88f053733dda78..f216b2f5c3d7b591387acf171cd77d633b6c3757 100644 (file)
@@ -57,6 +57,7 @@ FEATURE_TESTS_BASIC :=                  \
         libunwind-aarch64               \
         pthread-attr-setaffinity-np     \
         pthread-barrier                \
+        reallocarray                    \
         stackprotector-all              \
         timerfd                         \
         libdw-dwarf-unwind              \
index dac9563b54707c6cb65deb3e609ecc9785d4bf0c..0516259be70f071f2533496ead690d1ebd5ba3b8 100644 (file)
@@ -14,6 +14,7 @@ FILES=                                          \
          test-libaudit.bin                      \
          test-libbfd.bin                        \
          test-disassembler-four-args.bin        \
+         test-reallocarray.bin                 \
          test-liberty.bin                       \
          test-liberty-z.bin                     \
          test-cplus-demangle.bin                \
@@ -204,6 +205,9 @@ $(OUTPUT)test-libbfd.bin:
 $(OUTPUT)test-disassembler-four-args.bin:
        $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
 
+$(OUTPUT)test-reallocarray.bin:
+       $(BUILD)
+
 $(OUTPUT)test-liberty.bin:
        $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
 
diff --git a/tools/build/feature/test-reallocarray.c b/tools/build/feature/test-reallocarray.c
new file mode 100644 (file)
index 0000000..8170de3
--- /dev/null
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdlib.h>
+
+int main(void)
+{
+       return !!reallocarray(NULL, 1, 1);
+}
index 70fe612957338c84ffeb61f03c358c6ca1e32789..0d35f18006a136b4578f2298243e3728a067c272 100644 (file)
@@ -36,3 +36,7 @@
 #endif
 #define __printf(a, b) __attribute__((format(printf, a, b)))
 #define __scanf(a, b)  __attribute__((format(scanf, a, b)))
+
+#if GCC_VERSION >= 50100
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+#endif
diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h
new file mode 100644 (file)
index 0000000..8712ff7
--- /dev/null
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+#ifndef __LINUX_OVERFLOW_H
+#define __LINUX_OVERFLOW_H
+
+#include <linux/compiler.h>
+
+/*
+ * In the fallback code below, we need to compute the minimum and
+ * maximum values representable in a given type. These macros may also
+ * be useful elsewhere, so we provide them outside the
+ * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
+ *
+ * It would seem more obvious to do something like
+ *
+ * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
+ * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
+ *
+ * Unfortunately, the middle expressions, strictly speaking, have
+ * undefined behaviour, and at least some versions of gcc warn about
+ * the type_max expression (but not if -fsanitize=undefined is in
+ * effect; in that case, the warning is deferred to runtime...).
+ *
+ * The slightly excessive casting in type_min is to make sure the
+ * macros also produce sensible values for the exotic type _Bool. [The
+ * overflow checkers only almost work for _Bool, but that's
+ * a-feature-not-a-bug, since people shouldn't be doing arithmetic on
+ * _Bools. Besides, the gcc builtins don't allow _Bool* as third
+ * argument.]
+ *
+ * Idea stolen from
+ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
+ * credit to Christian Biere.
+ */
+#define is_signed_type(type)       (((type)(-1)) < (type)1)
+#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
+#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+
+
+#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+/*
+ * For simplicity and code hygiene, the fallback code below insists on
+ * a, b and *d having the same type (similar to the min() and max()
+ * macros), whereas gcc's type-generic overflow checkers accept
+ * different types. Hence we don't just make check_add_overflow an
+ * alias for __builtin_add_overflow, but add type checks similar to
+ * below.
+ */
+#define check_add_overflow(a, b, d) ({         \
+       typeof(a) __a = (a);                    \
+       typeof(b) __b = (b);                    \
+       typeof(d) __d = (d);                    \
+       (void) (&__a == &__b);                  \
+       (void) (&__a == __d);                   \
+       __builtin_add_overflow(__a, __b, __d);  \
+})
+
+#define check_sub_overflow(a, b, d) ({         \
+       typeof(a) __a = (a);                    \
+       typeof(b) __b = (b);                    \
+       typeof(d) __d = (d);                    \
+       (void) (&__a == &__b);                  \
+       (void) (&__a == __d);                   \
+       __builtin_sub_overflow(__a, __b, __d);  \
+})
+
+#define check_mul_overflow(a, b, d) ({         \
+       typeof(a) __a = (a);                    \
+       typeof(b) __b = (b);                    \
+       typeof(d) __d = (d);                    \
+       (void) (&__a == &__b);                  \
+       (void) (&__a == __d);                   \
+       __builtin_mul_overflow(__a, __b, __d);  \
+})
+
+#else
+
+
+/* Checking for unsigned overflow is relatively easy without causing UB. */
+#define __unsigned_add_overflow(a, b, d) ({    \
+       typeof(a) __a = (a);                    \
+       typeof(b) __b = (b);                    \
+       typeof(d) __d = (d);                    \
+       (void) (&__a == &__b);                  \
+       (void) (&__a == __d);                   \
+       *__d = __a + __b;                       \
+       *__d < __a;                             \
+})
+#define __unsigned_sub_overflow(a, b, d) ({    \
+       typeof(a) __a = (a);                    \
+       typeof(b) __b = (b);                    \
+       typeof(d) __d = (d);                    \
+       (void) (&__a == &__b);                  \
+       (void) (&__a == __d);                   \
+       *__d = __a - __b;                       \
+       __a < __b;                              \
+})
+/*
+ * If one of a or b is a compile-time constant, this avoids a division.
+ */
+#define __unsigned_mul_overflow(a, b, d) ({            \
+       typeof(a) __a = (a);                            \
+       typeof(b) __b = (b);                            \
+       typeof(d) __d = (d);                            \
+       (void) (&__a == &__b);                          \
+       (void) (&__a == __d);                           \
+       *__d = __a * __b;                               \
+       __builtin_constant_p(__b) ?                     \
+         __b > 0 && __a > type_max(typeof(__a)) / __b : \
+         __a > 0 && __b > type_max(typeof(__b)) / __a;  \
+})
+
+/*
+ * For signed types, detecting overflow is much harder, especially if
+ * we want to avoid UB. But the interface of these macros is such that
+ * we must provide a result in *d, and in fact we must produce the
+ * result promised by gcc's builtins, which is simply the possibly
+ * wrapped-around value. Fortunately, we can just formally do the
+ * operations in the widest relevant unsigned type (u64) and then
+ * truncate the result - gcc is smart enough to generate the same code
+ * with and without the (u64) casts.
+ */
+
+/*
+ * Adding two signed integers can overflow only if they have the same
+ * sign, and overflow has happened iff the result has the opposite
+ * sign.
+ */
+#define __signed_add_overflow(a, b, d) ({      \
+       typeof(a) __a = (a);                    \
+       typeof(b) __b = (b);                    \
+       typeof(d) __d = (d);                    \
+       (void) (&__a == &__b);                  \
+       (void) (&__a == __d);                   \
+       *__d = (u64)__a + (u64)__b;             \
+       (((~(__a ^ __b)) & (*__d ^ __a))        \
+               & type_min(typeof(__a))) != 0;  \
+})
+
+/*
+ * Subtraction is similar, except that overflow can now happen only
+ * when the signs are opposite. In this case, overflow has happened if
+ * the result has the opposite sign of a.
+ */
+#define __signed_sub_overflow(a, b, d) ({      \
+       typeof(a) __a = (a);                    \
+       typeof(b) __b = (b);                    \
+       typeof(d) __d = (d);                    \
+       (void) (&__a == &__b);                  \
+       (void) (&__a == __d);                   \
+       *__d = (u64)__a - (u64)__b;             \
+       ((((__a ^ __b)) & (*__d ^ __a))         \
+               & type_min(typeof(__a))) != 0;  \
+})
+
+/*
+ * Signed multiplication is rather hard. gcc always follows C99, so
+ * division is truncated towards 0. This means that we can write the
+ * overflow check like this:
+ *
+ * (a > 0 && (b > MAX/a || b < MIN/a)) ||
+ * (a < -1 && (b > MIN/a || b < MAX/a) ||
+ * (a == -1 && b == MIN)
+ *
+ * The redundant casts of -1 are to silence an annoying -Wtype-limits
+ * (included in -Wextra) warning: When the type is u8 or u16, the
+ * __b_c_e in check_mul_overflow obviously selects
+ * __unsigned_mul_overflow, but unfortunately gcc still parses this
+ * code and warns about the limited range of __b.
+ */
+
+#define __signed_mul_overflow(a, b, d) ({                              \
+       typeof(a) __a = (a);                                            \
+       typeof(b) __b = (b);                                            \
+       typeof(d) __d = (d);                                            \
+       typeof(a) __tmax = type_max(typeof(a));                         \
+       typeof(a) __tmin = type_min(typeof(a));                         \
+       (void) (&__a == &__b);                                          \
+       (void) (&__a == __d);                                           \
+       *__d = (u64)__a * (u64)__b;                                     \
+       (__b > 0   && (__a > __tmax/__b || __a < __tmin/__b)) ||        \
+       (__b < (typeof(__b))-1  && (__a > __tmin/__b || __a < __tmax/__b)) || \
+       (__b == (typeof(__b))-1 && __a == __tmin);                      \
+})
+
+
+#define check_add_overflow(a, b, d)                                    \
+       __builtin_choose_expr(is_signed_type(typeof(a)),                \
+                       __signed_add_overflow(a, b, d),                 \
+                       __unsigned_add_overflow(a, b, d))
+
+#define check_sub_overflow(a, b, d)                                    \
+       __builtin_choose_expr(is_signed_type(typeof(a)),                \
+                       __signed_sub_overflow(a, b, d),                 \
+                       __unsigned_sub_overflow(a, b, d))
+
+#define check_mul_overflow(a, b, d)                                    \
+       __builtin_choose_expr(is_signed_type(typeof(a)),                \
+                       __signed_mul_overflow(a, b, d),                 \
+                       __unsigned_mul_overflow(a, b, d))
+
+
+#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
+
+/**
+ * array_size() - Calculate size of 2-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ *
+ * Calculates size of 2-dimensional array: @a * @b.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array_size(size_t a, size_t b)
+{
+       size_t bytes;
+
+       if (check_mul_overflow(a, b, &bytes))
+               return SIZE_MAX;
+
+       return bytes;
+}
+
+/**
+ * array3_size() - Calculate size of 3-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ * @c: dimension three
+ *
+ * Calculates size of 3-dimensional array: @a * @b * @c.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
+{
+       size_t bytes;
+
+       if (check_mul_overflow(a, b, &bytes))
+               return SIZE_MAX;
+       if (check_mul_overflow(bytes, c, &bytes))
+               return SIZE_MAX;
+
+       return bytes;
+}
+
+static inline __must_check size_t __ab_c_size(size_t n, size_t size, size_t c)
+{
+       size_t bytes;
+
+       if (check_mul_overflow(n, size, &bytes))
+               return SIZE_MAX;
+       if (check_add_overflow(bytes, c, &bytes))
+               return SIZE_MAX;
+
+       return bytes;
+}
+
+/**
+ * struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @n: Number of elements in the array.
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @n @member elements.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define struct_size(p, member, n)                                      \
+       __ab_c_size(n,                                                  \
+                   sizeof(*(p)->member) + __must_be_array((p)->member),\
+                   sizeof(*(p)))
+
+#endif /* __LINUX_OVERFLOW_H */
diff --git a/tools/include/tools/libc_compat.h b/tools/include/tools/libc_compat.h
new file mode 100644 (file)
index 0000000..664ced8
--- /dev/null
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+
+#ifndef __TOOLS_LIBC_COMPAT_H
+#define __TOOLS_LIBC_COMPAT_H
+
+#include <stdlib.h>
+#include <linux/overflow.h>
+
+#ifdef COMPAT_NEED_REALLOCARRAY
+static inline void *reallocarray(void *ptr, size_t nmemb, size_t size)
+{
+       size_t bytes;
+
+       if (unlikely(check_mul_overflow(nmemb, size, &bytes)))
+               return NULL;
+       return realloc(ptr, bytes);
+}
+#endif
+#endif
index b7db3261c62d124760e98d9c851c1b01e64bdb03..870113916caca5ef3acbad43c821d5b5111d0ffc 100644 (file)
@@ -1826,7 +1826,7 @@ union bpf_attr {
  *             A non-negative value equal to or less than *size* on success,
  *             or a negative error in case of failure.
  *
- * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
  *     Description
  *             This helper is similar to **bpf_skb_load_bytes**\ () in that
  *             it provides an easy way to load *len* bytes from *offset*
@@ -1877,7 +1877,7 @@ union bpf_attr {
  *             * < 0 if any input argument is invalid
  *             *   0 on success (packet is forwarded, nexthop neighbor exists)
  *             * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
- *             *     packet is not forwarded or needs assist from full stack
+ *               packet is not forwarded or needs assist from full stack
  *
  * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
  *     Description
@@ -2033,7 +2033,6 @@ union bpf_attr {
  *             This helper is only available is the kernel was compiled with
  *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
  *             "**y**".
- *
  *     Return
  *             0
  *
@@ -2053,7 +2052,6 @@ union bpf_attr {
  *             This helper is only available is the kernel was compiled with
  *             the **CONFIG_BPF_LIRC_MODE2** configuration option set to
  *             "**y**".
- *
  *     Return
  *             0
  *
@@ -2557,6 +2555,9 @@ enum {
                                         * Arg1: old_state
                                         * Arg2: new_state
                                         */
+       BPF_SOCK_OPS_TCP_LISTEN_CB,     /* Called on listen(2), right after
+                                        * socket transition to LISTEN state.
+                                        */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
index 6070e655042dcb18f1013e1c740d8304023e79c4..13a861135127f04e21590955de6802ea40380208 100644 (file)
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o
index 5390e7725e4306408bbe5078652508f09263b0df..d49902e818b52f3e3d61ab503ca0df869ad877d9 100644 (file)
@@ -66,7 +66,7 @@ ifndef VERBOSE
 endif
 
 FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf
+FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
 FEATURE_DISPLAY = libelf bpf
 
 INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf
@@ -116,8 +116,8 @@ ifeq ($(feature-libelf-mmap), 1)
   override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
 endif
 
-ifeq ($(feature-libelf-getphdrnum), 1)
-  override CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
+ifeq ($(feature-reallocarray), 0)
+  override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
 endif
 
 # Append required CFLAGS
index 2d270c560df3982edc105fdb944d738be3c03024..09ecf8162f7a8a8edd1947c64c17da9720179af9 100644 (file)
 
 #define BTF_MAX_NR_TYPES 65535
 
+#define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \
+               ((k) == BTF_KIND_VOLATILE) || \
+               ((k) == BTF_KIND_CONST) || \
+               ((k) == BTF_KIND_RESTRICT))
+
 static struct btf_type btf_void;
 
 struct btf {
@@ -32,14 +37,6 @@ struct btf {
        int fd;
 };
 
-static const char *btf_name_by_offset(const struct btf *btf, __u32 offset)
-{
-       if (offset < btf->hdr->str_len)
-               return &btf->strings[offset];
-       else
-               return NULL;
-}
-
 static int btf_add_type(struct btf *btf, struct btf_type *t)
 {
        if (btf->types_size - btf->nr_types < 2) {
@@ -269,6 +266,26 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
        return nelems * size;
 }
 
+int btf__resolve_type(const struct btf *btf, __u32 type_id)
+{
+       const struct btf_type *t;
+       int depth = 0;
+
+       t = btf__type_by_id(btf, type_id);
+       while (depth < MAX_RESOLVE_DEPTH &&
+              !btf_type_is_void_or_null(t) &&
+              IS_MODIFIER(BTF_INFO_KIND(t->info))) {
+               type_id = t->type;
+               t = btf__type_by_id(btf, type_id);
+               depth++;
+       }
+
+       if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
+               return -EINVAL;
+
+       return type_id;
+}
+
 __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
 {
        __u32 i;
@@ -278,7 +295,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
 
        for (i = 1; i <= btf->nr_types; i++) {
                const struct btf_type *t = btf->types[i];
-               const char *name = btf_name_by_offset(btf, t->name_off);
+               const char *name = btf__name_by_offset(btf, t->name_off);
 
                if (name && !strcmp(type_name, name))
                        return i;
@@ -368,3 +385,11 @@ int btf__fd(const struct btf *btf)
 {
        return btf->fd;
 }
+
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+{
+       if (offset < btf->hdr->str_len)
+               return &btf->strings[offset];
+       else
+               return NULL;
+}
index e2a09a155f84faf7031b93faf37003060ecbd455..43c658ccfc2ba4061b244f32eb0cf3bfdeb54ba8 100644 (file)
@@ -19,6 +19,8 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
 __s32 btf__find_by_name(const struct btf *btf, const char *type_name);
 const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id);
 __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
+int btf__resolve_type(const struct btf *btf, __u32 type_id);
 int btf__fd(const struct btf *btf);
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 
 #endif
index 1aafdbe827fedce6434a372041d2df43385e549a..26e9527ee464e73c3f7a98b8ada11a241cf70a05 100644 (file)
@@ -22,6 +22,7 @@
  * License along with this program; if not,  see <http://www.gnu.org/licenses>
  */
 
+#define _GNU_SOURCE
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdarg.h>
@@ -42,6 +43,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
+#include <tools/libc_compat.h>
 #include <libelf.h>
 #include <gelf.h>
 
@@ -96,54 +98,6 @@ void libbpf_set_print(libbpf_print_fn_t warn,
 
 #define STRERR_BUFSIZE  128
 
-#define ERRNO_OFFSET(e)                ((e) - __LIBBPF_ERRNO__START)
-#define ERRCODE_OFFSET(c)      ERRNO_OFFSET(LIBBPF_ERRNO__##c)
-#define NR_ERRNO       (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
-
-static const char *libbpf_strerror_table[NR_ERRNO] = {
-       [ERRCODE_OFFSET(LIBELF)]        = "Something wrong in libelf",
-       [ERRCODE_OFFSET(FORMAT)]        = "BPF object format invalid",
-       [ERRCODE_OFFSET(KVERSION)]      = "'version' section incorrect or lost",
-       [ERRCODE_OFFSET(ENDIAN)]        = "Endian mismatch",
-       [ERRCODE_OFFSET(INTERNAL)]      = "Internal error in libbpf",
-       [ERRCODE_OFFSET(RELOC)]         = "Relocation failed",
-       [ERRCODE_OFFSET(VERIFY)]        = "Kernel verifier blocks program loading",
-       [ERRCODE_OFFSET(PROG2BIG)]      = "Program too big",
-       [ERRCODE_OFFSET(KVER)]          = "Incorrect kernel version",
-       [ERRCODE_OFFSET(PROGTYPE)]      = "Kernel doesn't support this program type",
-       [ERRCODE_OFFSET(WRNGPID)]       = "Wrong pid in netlink message",
-       [ERRCODE_OFFSET(INVSEQ)]        = "Invalid netlink sequence",
-};
-
-int libbpf_strerror(int err, char *buf, size_t size)
-{
-       if (!buf || !size)
-               return -1;
-
-       err = err > 0 ? err : -err;
-
-       if (err < __LIBBPF_ERRNO__START) {
-               int ret;
-
-               ret = strerror_r(err, buf, size);
-               buf[size - 1] = '\0';
-               return ret;
-       }
-
-       if (err < __LIBBPF_ERRNO__END) {
-               const char *msg;
-
-               msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
-               snprintf(buf, size, "%s", msg);
-               buf[size - 1] = '\0';
-               return 0;
-       }
-
-       snprintf(buf, size, "Unknown libbpf error %d", err);
-       buf[size - 1] = '\0';
-       return -1;
-}
-
 #define CHECK_ERR(action, err, out) do {       \
        err = action;                   \
        if (err)                        \
@@ -235,6 +189,7 @@ struct bpf_object {
        size_t nr_maps;
 
        bool loaded;
+       bool has_pseudo_calls;
 
        /*
         * Information when doing elf related work. Only valid if fd
@@ -369,7 +324,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
        progs = obj->programs;
        nr_progs = obj->nr_programs;
 
-       progs = realloc(progs, sizeof(progs[0]) * (nr_progs + 1));
+       progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
        if (!progs) {
                /*
                 * In this case the original obj->programs
@@ -401,10 +356,6 @@ bpf_object__init_prog_names(struct bpf_object *obj)
                const char *name = NULL;
 
                prog = &obj->programs[pi];
-               if (prog->idx == obj->efile.text_shndx) {
-                       name = ".text";
-                       goto skip_search;
-               }
 
                for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
                     si++) {
@@ -427,12 +378,15 @@ bpf_object__init_prog_names(struct bpf_object *obj)
                        }
                }
 
+               if (!name && prog->idx == obj->efile.text_shndx)
+                       name = ".text";
+
                if (!name) {
                        pr_warning("failed to find sym for prog %s\n",
                                   prog->section_name);
                        return -EINVAL;
                }
-skip_search:
+
                prog->name = strdup(name);
                if (!prog->name) {
                        pr_warning("failed to allocate memory for prog sym %s\n",
@@ -871,8 +825,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
                                continue;
                        }
 
-                       reloc = realloc(reloc,
-                                       sizeof(*obj->efile.reloc) * nr_reloc);
+                       reloc = reallocarray(reloc, nr_reloc,
+                                            sizeof(*obj->efile.reloc));
                        if (!reloc) {
                                pr_warning("realloc failed\n");
                                err = -ENOMEM;
@@ -982,6 +936,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
                        prog->reloc_desc[i].type = RELO_CALL;
                        prog->reloc_desc[i].insn_idx = insn_idx;
                        prog->reloc_desc[i].text_off = sym.st_value;
+                       obj->has_pseudo_calls = true;
                        continue;
                }
 
@@ -1085,6 +1040,53 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
        return 0;
 }
 
+int bpf_map__reuse_fd(struct bpf_map *map, int fd)
+{
+       struct bpf_map_info info = {};
+       __u32 len = sizeof(info);
+       int new_fd, err;
+       char *new_name;
+
+       err = bpf_obj_get_info_by_fd(fd, &info, &len);
+       if (err)
+               return err;
+
+       new_name = strdup(info.name);
+       if (!new_name)
+               return -errno;
+
+       new_fd = open("/", O_RDONLY | O_CLOEXEC);
+       if (new_fd < 0)
+               goto err_free_new_name;
+
+       new_fd = dup3(fd, new_fd, O_CLOEXEC);
+       if (new_fd < 0)
+               goto err_close_new_fd;
+
+       err = zclose(map->fd);
+       if (err)
+               goto err_close_new_fd;
+       free(map->name);
+
+       map->fd = new_fd;
+       map->name = new_name;
+       map->def.type = info.type;
+       map->def.key_size = info.key_size;
+       map->def.value_size = info.value_size;
+       map->def.max_entries = info.max_entries;
+       map->def.map_flags = info.map_flags;
+       map->btf_key_type_id = info.btf_key_type_id;
+       map->btf_value_type_id = info.btf_value_type_id;
+
+       return 0;
+
+err_close_new_fd:
+       close(new_fd);
+err_free_new_name:
+       free(new_name);
+       return -errno;
+}
+
 static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
@@ -1097,6 +1099,12 @@ bpf_object__create_maps(struct bpf_object *obj)
                struct bpf_map_def *def = &map->def;
                int *pfd = &map->fd;
 
+               if (map->fd >= 0) {
+                       pr_debug("skip map create (preset) %s: fd=%d\n",
+                                map->name, map->fd);
+                       continue;
+               }
+
                create_attr.name = map->name;
                create_attr.map_ifindex = map->map_ifindex;
                create_attr.map_type = def->type;
@@ -1167,7 +1175,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
                        return -LIBBPF_ERRNO__RELOC;
                }
                new_cnt = prog->insns_cnt + text->insns_cnt;
-               new_insn = realloc(prog->insns, new_cnt * sizeof(*insn));
+               new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
                if (!new_insn) {
                        pr_warning("oom in prog realloc\n");
                        return -ENOMEM;
@@ -1431,6 +1439,12 @@ out:
        return err;
 }
 
+static bool bpf_program__is_function_storage(struct bpf_program *prog,
+                                            struct bpf_object *obj)
+{
+       return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
+}
+
 static int
 bpf_object__load_progs(struct bpf_object *obj)
 {
@@ -1438,7 +1452,7 @@ bpf_object__load_progs(struct bpf_object *obj)
        int err;
 
        for (i = 0; i < obj->nr_programs; i++) {
-               if (obj->programs[i].idx == obj->efile.text_shndx)
+               if (bpf_program__is_function_storage(&obj->programs[i], obj))
                        continue;
                err = bpf_program__load(&obj->programs[i],
                                        obj->license,
@@ -1518,15 +1532,26 @@ out:
        return ERR_PTR(err);
 }
 
-struct bpf_object *bpf_object__open(const char *path)
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
 {
        /* param validation */
-       if (!path)
+       if (!attr->file)
                return NULL;
 
-       pr_debug("loading %s\n", path);
+       pr_debug("loading %s\n", attr->file);
+
+       return __bpf_object__open(attr->file, NULL, 0,
+                                 bpf_prog_type__needs_kver(attr->prog_type));
+}
 
-       return __bpf_object__open(path, NULL, 0, true);
+struct bpf_object *bpf_object__open(const char *path)
+{
+       struct bpf_object_open_attr attr = {
+               .file           = path,
+               .prog_type      = BPF_PROG_TYPE_UNSPEC,
+       };
+
+       return bpf_object__open_xattr(&attr);
 }
 
 struct bpf_object *bpf_object__open_buffer(void *obj_buf,
@@ -1863,8 +1888,8 @@ void *bpf_object__priv(struct bpf_object *obj)
        return obj ? obj->priv : ERR_PTR(-EINVAL);
 }
 
-struct bpf_program *
-bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
+static struct bpf_program *
+__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
 {
        size_t idx;
 
@@ -1885,6 +1910,18 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
        return &obj->programs[idx];
 }
 
+struct bpf_program *
+bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
+{
+       struct bpf_program *prog = prev;
+
+       do {
+               prog = __bpf_program__next(prog, obj);
+       } while (prog && bpf_program__is_function_storage(prog, obj));
+
+       return prog;
+}
+
 int bpf_program__set_priv(struct bpf_program *prog, void *priv,
                          bpf_program_clear_priv_t clear_priv)
 {
@@ -1901,6 +1938,11 @@ void *bpf_program__priv(struct bpf_program *prog)
        return prog ? prog->priv : ERR_PTR(-EINVAL);
 }
 
+void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
+{
+       prog->prog_ifindex = ifindex;
+}
+
 const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
 {
        const char *title;
@@ -2042,9 +2084,11 @@ static const struct {
        BPF_PROG_SEC("lwt_in",          BPF_PROG_TYPE_LWT_IN),
        BPF_PROG_SEC("lwt_out",         BPF_PROG_TYPE_LWT_OUT),
        BPF_PROG_SEC("lwt_xmit",        BPF_PROG_TYPE_LWT_XMIT),
+       BPF_PROG_SEC("lwt_seg6local",   BPF_PROG_TYPE_LWT_SEG6LOCAL),
        BPF_PROG_SEC("sockops",         BPF_PROG_TYPE_SOCK_OPS),
        BPF_PROG_SEC("sk_skb",          BPF_PROG_TYPE_SK_SKB),
        BPF_PROG_SEC("sk_msg",          BPF_PROG_TYPE_SK_MSG),
+       BPF_PROG_SEC("lirc_mode2",      BPF_PROG_TYPE_LIRC_MODE2),
        BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND),
        BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
        BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
@@ -2060,23 +2104,31 @@ static const struct {
 #undef BPF_S_PROG_SEC
 #undef BPF_SA_PROG_SEC
 
-static int bpf_program__identify_section(struct bpf_program *prog)
+int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+                            enum bpf_attach_type *expected_attach_type)
 {
        int i;
 
-       if (!prog->section_name)
-               goto err;
-
-       for (i = 0; i < ARRAY_SIZE(section_names); i++)
-               if (strncmp(prog->section_name, section_names[i].sec,
-                           section_names[i].len) == 0)
-                       return i;
+       if (!name)
+               return -EINVAL;
 
-err:
-       pr_warning("failed to guess program type based on section name %s\n",
-                  prog->section_name);
+       for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+               if (strncmp(name, section_names[i].sec, section_names[i].len))
+                       continue;
+               *prog_type = section_names[i].prog_type;
+               *expected_attach_type = section_names[i].expected_attach_type;
+               return 0;
+       }
+       return -EINVAL;
+}
 
-       return -1;
+static int
+bpf_program__identify_section(struct bpf_program *prog,
+                             enum bpf_prog_type *prog_type,
+                             enum bpf_attach_type *expected_attach_type)
+{
+       return libbpf_prog_type_by_name(prog->section_name, prog_type,
+                                       expected_attach_type);
 }
 
 int bpf_map__fd(struct bpf_map *map)
@@ -2125,6 +2177,16 @@ void *bpf_map__priv(struct bpf_map *map)
        return map ? map->priv : ERR_PTR(-EINVAL);
 }
 
+bool bpf_map__is_offload_neutral(struct bpf_map *map)
+{
+       return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+}
+
+void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+{
+       map->map_ifindex = ifindex;
+}
+
 struct bpf_map *
 bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
 {
@@ -2199,12 +2261,15 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
                        struct bpf_object **pobj, int *prog_fd)
 {
+       struct bpf_object_open_attr open_attr = {
+               .file           = attr->file,
+               .prog_type      = attr->prog_type,
+       };
        struct bpf_program *prog, *first_prog = NULL;
        enum bpf_attach_type expected_attach_type;
        enum bpf_prog_type prog_type;
        struct bpf_object *obj;
        struct bpf_map *map;
-       int section_idx;
        int err;
 
        if (!attr)
@@ -2212,8 +2277,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
        if (!attr->file)
                return -EINVAL;
 
-       obj = __bpf_object__open(attr->file, NULL, 0,
-                                bpf_prog_type__needs_kver(attr->prog_type));
+       obj = bpf_object__open_xattr(&open_attr);
        if (IS_ERR_OR_NULL(obj))
                return -ENOENT;
 
@@ -2226,26 +2290,27 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
                prog->prog_ifindex = attr->ifindex;
                expected_attach_type = attr->expected_attach_type;
                if (prog_type == BPF_PROG_TYPE_UNSPEC) {
-                       section_idx = bpf_program__identify_section(prog);
-                       if (section_idx < 0) {
+                       err = bpf_program__identify_section(prog, &prog_type,
+                                                           &expected_attach_type);
+                       if (err < 0) {
+                               pr_warning("failed to guess program type based on section name %s\n",
+                                          prog->section_name);
                                bpf_object__close(obj);
                                return -EINVAL;
                        }
-                       prog_type = section_names[section_idx].prog_type;
-                       expected_attach_type =
-                               section_names[section_idx].expected_attach_type;
                }
 
                bpf_program__set_type(prog, prog_type);
                bpf_program__set_expected_attach_type(prog,
                                                      expected_attach_type);
 
-               if (prog->idx != obj->efile.text_shndx && !first_prog)
+               if (!bpf_program__is_function_storage(prog, obj) && !first_prog)
                        first_prog = prog;
        }
 
        bpf_map__for_each(map, obj) {
-               map->map_ifindex = attr->ifindex;
+               if (!bpf_map__is_offload_neutral(map))
+                       map->map_ifindex = attr->ifindex;
        }
 
        if (!first_prog) {
index b33ae02f7d0e4f6c0e301d9f96f33e86a4585b5c..413778a9349939bb5d8076ef56e8e6ae8ae86efa 100644 (file)
@@ -66,7 +66,13 @@ void libbpf_set_print(libbpf_print_fn_t warn,
 /* Hide internal to user */
 struct bpf_object;
 
+struct bpf_object_open_attr {
+       const char *file;
+       enum bpf_prog_type prog_type;
+};
+
 struct bpf_object *bpf_object__open(const char *path);
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr);
 struct bpf_object *bpf_object__open_buffer(void *obj_buf,
                                           size_t obj_buf_sz,
                                           const char *name);
@@ -92,6 +98,9 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
                         bpf_object_clear_priv_t clear_priv);
 void *bpf_object__priv(struct bpf_object *prog);
 
+int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+                            enum bpf_attach_type *expected_attach_type);
+
 /* Accessors of bpf_program */
 struct bpf_program;
 struct bpf_program *bpf_program__next(struct bpf_program *prog,
@@ -109,6 +118,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv,
                          bpf_program_clear_priv_t clear_priv);
 
 void *bpf_program__priv(struct bpf_program *prog);
+void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex);
 
 const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
 
@@ -251,6 +261,9 @@ typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
 int bpf_map__set_priv(struct bpf_map *map, void *priv,
                      bpf_map_clear_priv_t clear_priv);
 void *bpf_map__priv(struct bpf_map *map);
+int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+bool bpf_map__is_offload_neutral(struct bpf_map *map);
+void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 int bpf_map__pin(struct bpf_map *map, const char *path);
 
 long libbpf_get_error(const void *ptr);
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
new file mode 100644 (file)
index 0000000..d9ba851
--- /dev/null
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "libbpf.h"
+
+#define ERRNO_OFFSET(e)                ((e) - __LIBBPF_ERRNO__START)
+#define ERRCODE_OFFSET(c)      ERRNO_OFFSET(LIBBPF_ERRNO__##c)
+#define NR_ERRNO       (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
+
+static const char *libbpf_strerror_table[NR_ERRNO] = {
+       [ERRCODE_OFFSET(LIBELF)]        = "Something wrong in libelf",
+       [ERRCODE_OFFSET(FORMAT)]        = "BPF object format invalid",
+       [ERRCODE_OFFSET(KVERSION)]      = "'version' section incorrect or lost",
+       [ERRCODE_OFFSET(ENDIAN)]        = "Endian mismatch",
+       [ERRCODE_OFFSET(INTERNAL)]      = "Internal error in libbpf",
+       [ERRCODE_OFFSET(RELOC)]         = "Relocation failed",
+       [ERRCODE_OFFSET(VERIFY)]        = "Kernel verifier blocks program loading",
+       [ERRCODE_OFFSET(PROG2BIG)]      = "Program too big",
+       [ERRCODE_OFFSET(KVER)]          = "Incorrect kernel version",
+       [ERRCODE_OFFSET(PROGTYPE)]      = "Kernel doesn't support this program type",
+       [ERRCODE_OFFSET(WRNGPID)]       = "Wrong pid in netlink message",
+       [ERRCODE_OFFSET(INVSEQ)]        = "Invalid netlink sequence",
+};
+
+int libbpf_strerror(int err, char *buf, size_t size)
+{
+       if (!buf || !size)
+               return -1;
+
+       err = err > 0 ? err : -err;
+
+       if (err < __LIBBPF_ERRNO__START) {
+               int ret;
+
+               ret = strerror_r(err, buf, size);
+               buf[size - 1] = '\0';
+               return ret;
+       }
+
+       if (err < __LIBBPF_ERRNO__END) {
+               const char *msg;
+
+               msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
+               snprintf(buf, size, "%s", msg);
+               buf[size - 1] = '\0';
+               return 0;
+       }
+
+       snprintf(buf, size, "Unknown libbpf error %d", err);
+       buf[size - 1] = '\0';
+       return -1;
+}
index a362e3d7abc633fd33db81aa3bd99da27b6edd4b..5169a97eb68b6493a37753dee498b2a09300873a 100644 (file)
@@ -61,6 +61,7 @@ $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 $(OUTPUT)/test_sock: cgroup_helpers.c
 $(OUTPUT)/test_sock_addr: cgroup_helpers.c
 $(OUTPUT)/test_sockmap: cgroup_helpers.c
+$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
 $(OUTPUT)/test_progs: trace_helpers.c
 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
 
index c87b4e052ce961e8a9f2a639a7d1bbcae9a828eb..cf16948aad4adb1e32d33d21f0742ddf724e524e 100644 (file)
@@ -118,7 +118,7 @@ static int join_cgroup_from_top(char *cgroup_path)
  *
  * On success, it returns 0, otherwise on failure it returns 1.
  */
-int join_cgroup(char *path)
+int join_cgroup(const char *path)
 {
        char cgroup_path[PATH_MAX + 1];
 
@@ -158,7 +158,7 @@ void cleanup_cgroup_environment(void)
  * On success, it returns the file descriptor. On failure it returns 0.
  * If there is a failure, it prints the error to stderr.
  */
-int create_and_get_cgroup(char *path)
+int create_and_get_cgroup(const char *path)
 {
        char cgroup_path[PATH_MAX + 1];
        int fd;
@@ -186,7 +186,7 @@ int create_and_get_cgroup(char *path)
  * which is an invalid cgroup id.
  * If there is a failure, it prints the error to stderr.
  */
-unsigned long long get_cgroup_id(char *path)
+unsigned long long get_cgroup_id(const char *path)
 {
        int dirfd, err, flags, mount_id, fhsize;
        union {
index 20a4a5dcd469019907341562c799b42a3bd95ddf..d64bb895709078435295e7e8b1d981ff90a16e0b 100644 (file)
@@ -9,10 +9,10 @@
        __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
 
 
-int create_and_get_cgroup(char *path);
-int join_cgroup(char *path);
+int create_and_get_cgroup(const char *path);
+int join_cgroup(const char *path);
 int setup_cgroup_environment(void);
 void cleanup_cgroup_environment(void);
-unsigned long long get_cgroup_id(char *path);
+unsigned long long get_cgroup_id(const char *path);
 
 #endif
index be800d0e7a841abfbc60545cf63fe33219db0c35..d59642e70f5625f8b7f1df2262d6c032cf520332 100755 (executable)
@@ -158,8 +158,9 @@ def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
     else:
         return ret, out
 
-def bpftool(args, JSON=True, ns="", fail=True):
-    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
+def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False):
+    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns,
+                fail=fail, include_stderr=include_stderr)
 
 def bpftool_prog_list(expected=None, ns=""):
     _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
@@ -201,6 +202,21 @@ def bpftool_map_list_wait(expected=0, n_retry=20):
         time.sleep(0.05)
     raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
 
+def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
+                      fail=True, include_stderr=False):
+    args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
+    if prog_type is not None:
+        args += " type " + prog_type
+    if dev is not None:
+        args += " dev " + dev
+    if len(maps):
+        args += " map " + " map ".join(maps)
+
+    res = bpftool(args, fail=fail, include_stderr=include_stderr)
+    if res[0] == 0:
+        files.append(file_name)
+    return res
+
 def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False):
     if force:
         args = "-force " + args
@@ -307,21 +323,25 @@ class NetdevSim:
     Class for netdevsim netdevice and its attributes.
     """
 
-    def __init__(self):
+    def __init__(self, link=None):
+        self.link = link
+
         self.dev = self._netdevsim_create()
         devs.append(self)
 
         self.ns = ""
 
         self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
+        self.sdev_dir = self.dfs_dir + '/sdev/'
         self.dfs_refresh()
 
     def __getitem__(self, key):
         return self.dev[key]
 
     def _netdevsim_create(self):
+        link = "" if self.link is None else "link " + self.link.dev['ifname']
         _, old  = ip("link show")
-        ip("link add sim%d type netdevsim")
+        ip("link add sim%d {link} type netdevsim".format(link=link))
         _, new  = ip("link show")
 
         for dev in new:
@@ -339,13 +359,18 @@ class NetdevSim:
         self.dfs = DebugfsDir(self.dfs_dir)
         return self.dfs
 
+    def dfs_read(self, f):
+        path = os.path.join(self.dfs_dir, f)
+        _, data = cmd('cat %s' % (path))
+        return data.strip()
+
     def dfs_num_bound_progs(self):
-        path = os.path.join(self.dfs_dir, "bpf_bound_progs")
+        path = os.path.join(self.sdev_dir, "bpf_bound_progs")
         _, progs = cmd('ls %s' % (path))
         return len(progs.split())
 
     def dfs_get_bound_progs(self, expected):
-        progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs"))
+        progs = DebugfsDir(os.path.join(self.sdev_dir, "bpf_bound_progs"))
         if expected is not None:
             if len(progs) != expected:
                 fail(True, "%d BPF programs bound, expected %d" %
@@ -547,11 +572,11 @@ def check_extack(output, reference, args):
     if skip_extack:
         return
     lines = output.split("\n")
-    comp = len(lines) >= 2 and lines[1] == reference
+    comp = len(lines) >= 2 and lines[1] == 'Error: ' + reference
     fail(not comp, "Missing or incorrect netlink extack message")
 
 def check_extack_nsim(output, reference, args):
-    check_extack(output, "Error: netdevsim: " + reference, args)
+    check_extack(output, "netdevsim: " + reference, args)
 
 def check_no_extack(res, needle):
     fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"),
@@ -654,7 +679,7 @@ try:
     ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
                                          fail=False, include_stderr=True)
     fail(ret == 0, "TC filter loaded without enabling TC offloads")
-    check_extack(err, "Error: TC offload is disabled on net device.", args)
+    check_extack(err, "TC offload is disabled on net device.", args)
     sim.wait_for_flush()
 
     sim.set_ethtool_tc_offloads(True)
@@ -694,7 +719,7 @@ try:
                                          skip_sw=True,
                                          fail=False, include_stderr=True)
     fail(ret == 0, "Offloaded a filter to chain other than 0")
-    check_extack(err, "Error: Driver supports only offload of chain 0.", args)
+    check_extack(err, "Driver supports only offload of chain 0.", args)
     sim.tc_flush_filters()
 
     start_test("Test TC replace...")
@@ -814,24 +839,20 @@ try:
          "Device parameters reported for non-offloaded program")
 
     start_test("Test XDP prog replace with bad flags...")
-    ret, _, err = sim.set_xdp(obj, "offload", force=True,
+    ret, _, err = sim.set_xdp(obj, "generic", force=True,
                               fail=False, include_stderr=True)
     fail(ret == 0, "Replaced XDP program with a program in different mode")
-    check_extack_nsim(err, "program loaded with different flags.", args)
+    fail(err.count("File exists") != 1, "Replaced driver XDP with generic")
     ret, _, err = sim.set_xdp(obj, "", force=True,
                               fail=False, include_stderr=True)
     fail(ret == 0, "Replaced XDP program with a program in different mode")
-    check_extack_nsim(err, "program loaded with different flags.", args)
+    check_extack(err, "program loaded with different flags.", args)
 
     start_test("Test XDP prog remove with bad flags...")
-    ret, _, err = sim.unset_xdp("offload", force=True,
-                                fail=False, include_stderr=True)
-    fail(ret == 0, "Removed program with a bad mode mode")
-    check_extack_nsim(err, "program loaded with different flags.", args)
     ret, _, err = sim.unset_xdp("", force=True,
                                 fail=False, include_stderr=True)
-    fail(ret == 0, "Removed program with a bad mode mode")
-    check_extack_nsim(err, "program loaded with different flags.", args)
+    fail(ret == 0, "Removed program with a bad mode")
+    check_extack(err, "program loaded with different flags.", args)
 
     start_test("Test MTU restrictions...")
     ret, _ = sim.set_mtu(9000, fail=False)
@@ -846,6 +867,25 @@ try:
     sim.set_mtu(1500)
 
     sim.wait_for_flush()
+    start_test("Test non-offload XDP attaching to HW...")
+    bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/nooffload")
+    nooffload = bpf_pinned("/sys/fs/bpf/nooffload")
+    ret, _, err = sim.set_xdp(nooffload, "offload",
+                              fail=False, include_stderr=True)
+    fail(ret == 0, "attached non-offloaded XDP program to HW")
+    check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+    rm("/sys/fs/bpf/nooffload")
+
+    start_test("Test offload XDP attaching to drv...")
+    bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+                      dev=sim['ifname'])
+    offload = bpf_pinned("/sys/fs/bpf/offload")
+    ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
+    fail(ret == 0, "attached offloaded XDP program to drv")
+    check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args)
+    rm("/sys/fs/bpf/offload")
+    sim.wait_for_flush()
+
     start_test("Test XDP offload...")
     _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
     ipl = sim.ip_link_show(xdp=True)
@@ -891,6 +931,60 @@ try:
     rm(pin_file)
     bpftool_prog_list_wait(expected=0)
 
+    start_test("Test multi-attachment XDP - attach...")
+    sim.set_xdp(obj, "offload")
+    xdp = sim.ip_link_show(xdp=True)["xdp"]
+    offloaded = sim.dfs_read("bpf_offloaded_id")
+    fail("prog" not in xdp, "Base program not reported in single program mode")
+    fail(len(ipl["xdp"]["attached"]) != 1,
+         "Wrong attached program count with one program")
+
+    sim.set_xdp(obj, "")
+    two_xdps = sim.ip_link_show(xdp=True)["xdp"]
+    offloaded2 = sim.dfs_read("bpf_offloaded_id")
+
+    fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs")
+    fail("prog" in two_xdps, "Base program reported in multi program mode")
+    fail(xdp["attached"][0] not in two_xdps["attached"],
+         "Offload program not reported after driver activated")
+    fail(len(two_xdps["attached"]) != 2,
+         "Wrong attached program count with two programs")
+    fail(two_xdps["attached"][0]["prog"]["id"] ==
+         two_xdps["attached"][1]["prog"]["id"],
+         "offloaded and drv programs have the same id")
+    fail(offloaded != offloaded2,
+         "offload ID changed after loading driver program")
+
+    start_test("Test multi-attachment XDP - replace...")
+    ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
+    fail(err.count("busy") != 1, "Replaced one of programs without -force")
+
+    start_test("Test multi-attachment XDP - detach...")
+    ret, _, err = sim.unset_xdp("drv", force=True,
+                                fail=False, include_stderr=True)
+    fail(ret == 0, "Removed program with a bad mode")
+    check_extack(err, "program loaded with different flags.", args)
+
+    sim.unset_xdp("offload")
+    xdp = sim.ip_link_show(xdp=True)["xdp"]
+    offloaded = sim.dfs_read("bpf_offloaded_id")
+
+    fail(xdp["mode"] != 1, "Bad mode reported after multiple programs")
+    fail("prog" not in xdp,
+         "Base program not reported after multi program mode")
+    fail(xdp["attached"][0] not in two_xdps["attached"],
+         "Offload program not reported after driver activated")
+    fail(len(ipl["xdp"]["attached"]) != 1,
+         "Wrong attached program count with remaining programs")
+    fail(offloaded != "0", "offload ID reported with only driver program left")
+
+    start_test("Test multi-attachment XDP - device remove...")
+    sim.set_xdp(obj, "offload")
+    sim.remove()
+
+    sim = NetdevSim()
+    sim.set_ethtool_tc_offloads(True)
+
     start_test("Test mixing of TC and XDP...")
     sim.tc_add_ingress()
     sim.set_xdp(obj, "offload")
@@ -1085,6 +1179,106 @@ try:
     fail(ret == 0,
          "netdevsim didn't refuse to create a map with offload disabled")
 
+    sim.remove()
+
+    start_test("Test multi-dev ASIC program reuse...")
+    simA = NetdevSim()
+    simB1 = NetdevSim()
+    simB2 = NetdevSim(link=simB1)
+    simB3 = NetdevSim(link=simB1)
+    sims = (simA, simB1, simB2, simB3)
+    simB = (simB1, simB2, simB3)
+
+    bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA",
+                      dev=simA['ifname'])
+    progA = bpf_pinned("/sys/fs/bpf/nsimA")
+    bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB",
+                      dev=simB1['ifname'])
+    progB = bpf_pinned("/sys/fs/bpf/nsimB")
+
+    simA.set_xdp(progA, "offload", JSON=False)
+    for d in simB:
+        d.set_xdp(progB, "offload", JSON=False)
+
+    start_test("Test multi-dev ASIC cross-dev replace...")
+    ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False)
+    fail(ret == 0, "cross-ASIC program allowed")
+    for d in simB:
+        ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False)
+        fail(ret == 0, "cross-ASIC program allowed")
+
+    start_test("Test multi-dev ASIC cross-dev install...")
+    for d in sims:
+        d.unset_xdp("offload")
+
+    ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False,
+                               fail=False, include_stderr=True)
+    fail(ret == 0, "cross-ASIC program allowed")
+    check_extack_nsim(err, "program bound to different dev.", args)
+    for d in simB:
+        ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False,
+                                fail=False, include_stderr=True)
+        fail(ret == 0, "cross-ASIC program allowed")
+        check_extack_nsim(err, "program bound to different dev.", args)
+
+    start_test("Test multi-dev ASIC cross-dev map reuse...")
+
+    mapA = bpftool("prog show %s" % (progA))[1]["map_ids"][0]
+    mapB = bpftool("prog show %s" % (progB))[1]["map_ids"][0]
+
+    ret, _ = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+                               dev=simB3['ifname'],
+                               maps=["idx 0 id %d" % (mapB)],
+                               fail=False)
+    fail(ret != 0, "couldn't reuse a map on the same ASIC")
+    rm("/sys/fs/bpf/nsimB_")
+
+    ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA_",
+                                    dev=simA['ifname'],
+                                    maps=["idx 0 id %d" % (mapB)],
+                                    fail=False, include_stderr=True)
+    fail(ret == 0, "could reuse a map on a different ASIC")
+    fail(err.count("offload device mismatch between prog and map") == 0,
+         "error message missing for cross-ASIC map")
+
+    ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+                                    dev=simB1['ifname'],
+                                    maps=["idx 0 id %d" % (mapA)],
+                                    fail=False, include_stderr=True)
+    fail(ret == 0, "could reuse a map on a different ASIC")
+    fail(err.count("offload device mismatch between prog and map") == 0,
+         "error message missing for cross-ASIC map")
+
+    start_test("Test multi-dev ASIC cross-dev destruction...")
+    bpftool_prog_list_wait(expected=2)
+
+    simA.remove()
+    bpftool_prog_list_wait(expected=1)
+
+    ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+    fail(ifnameB != simB1['ifname'], "program not bound to originial device")
+    simB1.remove()
+    bpftool_prog_list_wait(expected=1)
+
+    start_test("Test multi-dev ASIC cross-dev destruction - move...")
+    ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+    fail(ifnameB not in (simB2['ifname'], simB3['ifname']),
+         "program not bound to remaining devices")
+
+    simB2.remove()
+    ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+    fail(ifnameB != simB3['ifname'], "program not bound to remaining device")
+
+    simB3.remove()
+    bpftool_prog_list_wait(expected=0)
+
+    start_test("Test multi-dev ASIC cross-dev destruction - orphaned...")
+    ret, out = bpftool("prog show %s" % (progB), fail=False)
+    fail(ret == 0, "got information about orphaned program")
+    fail("error" not in out, "no error reported for get info on orphaned")
+    fail(out["error"] != "can't get prog info: No such device",
+         "wrong error for get info on orphaned")
+
     print("%s: OK" % (os.path.basename(__file__)))
 
 finally:
index a5e76b9219b9d86ab83d92d9f4964fea5d30c915..2e45c92d11111784ff0e5d5b520af48ad986d911 100644 (file)
@@ -998,8 +998,9 @@ int init_pktinfo(int domain, struct cmsghdr *cmsg)
        return 0;
 }
 
-static int sendmsg_to_server(const struct sockaddr_storage *addr,
-                            socklen_t addr_len, int set_cmsg, int *syscall_err)
+static int sendmsg_to_server(int type, const struct sockaddr_storage *addr,
+                            socklen_t addr_len, int set_cmsg, int flags,
+                            int *syscall_err)
 {
        union {
                char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
@@ -1022,7 +1023,7 @@ static int sendmsg_to_server(const struct sockaddr_storage *addr,
                goto err;
        }
 
-       fd = socket(domain, SOCK_DGRAM, 0);
+       fd = socket(domain, type, 0);
        if (fd == -1) {
                log_err("Failed to create client socket");
                goto err;
@@ -1052,7 +1053,7 @@ static int sendmsg_to_server(const struct sockaddr_storage *addr,
                }
        }
 
-       if (sendmsg(fd, &hdr, 0) != sizeof(data)) {
+       if (sendmsg(fd, &hdr, flags) != sizeof(data)) {
                log_err("Fail to send message to server");
                *syscall_err = errno;
                goto err;
@@ -1066,6 +1067,15 @@ out:
        return fd;
 }
 
+static int fastconnect_to_server(const struct sockaddr_storage *addr,
+                                socklen_t addr_len)
+{
+       int sendmsg_err;
+
+       return sendmsg_to_server(SOCK_STREAM, addr, addr_len, /*set_cmsg*/0,
+                                MSG_FASTOPEN, &sendmsg_err);
+}
+
 static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr)
 {
        struct timeval tv;
@@ -1185,6 +1195,20 @@ static int run_connect_test_case(const struct sock_addr_test *test)
        if (cmp_local_ip(clientfd, &expected_src_addr))
                goto err;
 
+       if (test->type == SOCK_STREAM) {
+               /* Test TCP Fast Open scenario */
+               clientfd = fastconnect_to_server(&requested_addr, addr_len);
+               if (clientfd == -1)
+                       goto err;
+
+               /* Make sure src and dst addrs were overridden properly */
+               if (cmp_peer_addr(clientfd, &expected_addr))
+                       goto err;
+
+               if (cmp_local_ip(clientfd, &expected_src_addr))
+                       goto err;
+       }
+
        goto out;
 err:
        err = -1;
@@ -1222,8 +1246,9 @@ static int run_sendmsg_test_case(const struct sock_addr_test *test)
                if (clientfd >= 0)
                        close(clientfd);
 
-               clientfd = sendmsg_to_server(&requested_addr, addr_len,
-                                            set_cmsg, &err);
+               clientfd = sendmsg_to_server(test->type, &requested_addr,
+                                            addr_len, set_cmsg, /*flags*/0,
+                                            &err);
                if (err)
                        goto out;
                else if (clientfd == -1)
index 2fe43289943c7fcf2fc5715c5abb4b4d10d06b1f..7bcfa62070056e0a07d6d8c03e25ccfa37e2bb0b 100644 (file)
@@ -12,5 +12,6 @@ struct tcpbpf_globals {
        __u32 good_cb_test_rv;
        __u64 bytes_received;
        __u64 bytes_acked;
+       __u32 num_listen;
 };
 #endif
index 3e645ee41ed5fcc033266645b8e893c143fb79fc..4b7fd540cea9dd89d506bfe693be0414e79b6d90 100644 (file)
@@ -96,15 +96,22 @@ int bpf_testcb(struct bpf_sock_ops *skops)
                        if (!gp)
                                break;
                        g = *gp;
-                       g.total_retrans = skops->total_retrans;
-                       g.data_segs_in = skops->data_segs_in;
-                       g.data_segs_out = skops->data_segs_out;
-                       g.bytes_received = skops->bytes_received;
-                       g.bytes_acked = skops->bytes_acked;
+                       if (skops->args[0] == BPF_TCP_LISTEN) {
+                               g.num_listen++;
+                       } else {
+                               g.total_retrans = skops->total_retrans;
+                               g.data_segs_in = skops->data_segs_in;
+                               g.data_segs_out = skops->data_segs_out;
+                               g.bytes_received = skops->bytes_received;
+                               g.bytes_acked = skops->bytes_acked;
+                       }
                        bpf_map_update_elem(&global_map, &key, &g,
                                            BPF_ANY);
                }
                break;
+       case BPF_SOCK_OPS_TCP_LISTEN_CB:
+               bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+               break;
        default:
                rv = -1;
        }
index 84ab5163c8281211f606167cfc23ed152efa0fcb..a275c29713760b79a2e74700d14fd78931947976 100644 (file)
@@ -1,27 +1,59 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdio.h>
 #include <unistd.h>
 #include <errno.h>
-#include <signal.h>
 #include <string.h>
-#include <assert.h>
-#include <linux/perf_event.h>
-#include <linux/ptrace.h>
 #include <linux/bpf.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
 #include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
-#include "bpf_util.h"
+
 #include "bpf_rlimit.h"
-#include <linux/perf_event.h>
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
 #include "test_tcpbpf.h"
 
+#define EXPECT_EQ(expected, actual, fmt)                       \
+       do {                                                    \
+               if ((expected) != (actual)) {                   \
+                       printf("  Value of: " #actual "\n"      \
+                              "    Actual: %" fmt "\n"         \
+                              "  Expected: %" fmt "\n",        \
+                              (actual), (expected));           \
+                       goto err;                               \
+               }                                               \
+       } while (0)
+
+int verify_result(const struct tcpbpf_globals *result)
+{
+       __u32 expected_events;
+
+       expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
+                          (1 << BPF_SOCK_OPS_RWND_INIT) |
+                          (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
+                          (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
+                          (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
+                          (1 << BPF_SOCK_OPS_NEEDS_ECN) |
+                          (1 << BPF_SOCK_OPS_STATE_CB) |
+                          (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
+
+       EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
+       EXPECT_EQ(501ULL, result->bytes_received, "llu");
+       EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
+       EXPECT_EQ(1, result->data_segs_in, PRIu32);
+       EXPECT_EQ(1, result->data_segs_out, PRIu32);
+       EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
+       EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
+       EXPECT_EQ(1, result->num_listen, PRIu32);
+
+       return 0;
+err:
+       return -1;
+}
+
 static int bpf_find_map(const char *test, struct bpf_object *obj,
                        const char *name)
 {
@@ -35,42 +67,28 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
        return bpf_map__fd(map);
 }
 
-#define SYSTEM(CMD)                                            \
-       do {                                                    \
-               if (system(CMD)) {                              \
-                       printf("system(%s) FAILS!\n", CMD);     \
-               }                                               \
-       } while (0)
-
 int main(int argc, char **argv)
 {
        const char *file = "test_tcpbpf_kern.o";
        struct tcpbpf_globals g = {0};
-       int cg_fd, prog_fd, map_fd;
-       bool debug_flag = false;
+       const char *cg_path = "/foo";
        int error = EXIT_FAILURE;
        struct bpf_object *obj;
-       char cmd[100], *dir;
-       struct stat buffer;
+       int prog_fd, map_fd;
+       int cg_fd = -1;
        __u32 key = 0;
-       int pid;
        int rv;
 
-       if (argc > 1 && strcmp(argv[1], "-d") == 0)
-               debug_flag = true;
+       if (setup_cgroup_environment())
+               goto err;
 
-       dir = "/tmp/cgroupv2/foo";
+       cg_fd = create_and_get_cgroup(cg_path);
+       if (!cg_fd)
+               goto err;
 
-       if (stat(dir, &buffer) != 0) {
-               SYSTEM("mkdir -p /tmp/cgroupv2");
-               SYSTEM("mount -t cgroup2 none /tmp/cgroupv2");
-               SYSTEM("mkdir -p /tmp/cgroupv2/foo");
-       }
-       pid = (int) getpid();
-       sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid);
-       SYSTEM(cmd);
+       if (join_cgroup(cg_path))
+               goto err;
 
-       cg_fd = open(dir, O_DIRECTORY, O_RDONLY);
        if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
                printf("FAILED: load_bpf_file failed for: %s\n", file);
                goto err;
@@ -83,7 +101,10 @@ int main(int argc, char **argv)
                goto err;
        }
 
-       SYSTEM("./tcp_server.py");
+       if (system("./tcp_server.py")) {
+               printf("FAILED: TCP server\n");
+               goto err;
+       }
 
        map_fd = bpf_find_map(__func__, obj, "global_map");
        if (map_fd < 0)
@@ -95,34 +116,16 @@ int main(int argc, char **argv)
                goto err;
        }
 
-       if (g.bytes_received != 501 || g.bytes_acked != 1002 ||
-           g.data_segs_in != 1 || g.data_segs_out != 1 ||
-           (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 ||
-               g.good_cb_test_rv != 0) {
+       if (verify_result(&g)) {
                printf("FAILED: Wrong stats\n");
-               if (debug_flag) {
-                       printf("\n");
-                       printf("bytes_received: %d (expecting 501)\n",
-                              (int)g.bytes_received);
-                       printf("bytes_acked:    %d (expecting 1002)\n",
-                              (int)g.bytes_acked);
-                       printf("data_segs_in:   %d (expecting 1)\n",
-                              g.data_segs_in);
-                       printf("data_segs_out:  %d (expecting 1)\n",
-                              g.data_segs_out);
-                       printf("event_map:      0x%x (at least 0x47e)\n",
-                              g.event_map);
-                       printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n",
-                              g.bad_cb_test_rv);
-                       printf("good_cb_test_rv:0x%x (expecting 0)\n",
-                              g.good_cb_test_rv);
-               }
                goto err;
        }
+
        printf("PASSED!\n");
        error = 0;
 err:
        bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+       close(cg_fd);
+       cleanup_cgroup_environment();
        return error;
-
 }
index 3868dcb634201a1016ef5dc0bcba7450cb9eff57..cabe2a3a3b30076f3bcd282758e43774bde542ec 100644 (file)
@@ -88,7 +88,7 @@ static int page_size;
 static int page_cnt = 8;
 static struct perf_event_mmap_page *header;
 
-int perf_event_mmap(int fd)
+int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header)
 {
        void *base;
        int mmap_size;
@@ -102,10 +102,15 @@ int perf_event_mmap(int fd)
                return -1;
        }
 
-       header = base;
+       *header = base;
        return 0;
 }
 
+int perf_event_mmap(int fd)
+{
+       return perf_event_mmap_header(fd, &header);
+}
+
 static int perf_event_poll(int fd)
 {
        struct pollfd pfd = { .fd = fd, .events = POLLIN };
@@ -163,3 +168,42 @@ int perf_event_poller(int fd, perf_event_print_fn output_fn)
 
        return ret;
 }
+
+int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
+                           int num_fds, perf_event_print_fn output_fn)
+{
+       enum bpf_perf_event_ret ret;
+       struct pollfd *pfds;
+       void *buf = NULL;
+       size_t len = 0;
+       int i;
+
+       pfds = calloc(num_fds, sizeof(*pfds));
+       if (!pfds)
+               return LIBBPF_PERF_EVENT_ERROR;
+
+       for (i = 0; i < num_fds; i++) {
+               pfds[i].fd = fds[i];
+               pfds[i].events = POLLIN;
+       }
+
+       for (;;) {
+               poll(pfds, num_fds, 1000);
+               for (i = 0; i < num_fds; i++) {
+                       if (!pfds[i].revents)
+                               continue;
+
+                       ret = bpf_perf_event_read_simple(headers[i],
+                                                        page_cnt * page_size,
+                                                        page_size, &buf, &len,
+                                                        bpf_perf_event_print,
+                                                        output_fn);
+                       if (ret != LIBBPF_PERF_EVENT_CONT)
+                               break;
+               }
+       }
+       free(buf);
+       free(pfds);
+
+       return ret;
+}
index 3b4bcf7f5084ff55901e508216e24f6853f2214d..18924f23db1b99f30fbf198830868d98d7eb42b4 100644 (file)
@@ -3,6 +3,7 @@
 #define __TRACE_HELPER_H
 
 #include <libbpf.h>
+#include <linux/perf_event.h>
 
 struct ksym {
        long addr;
@@ -16,6 +17,9 @@ long ksym_get_addr(const char *name);
 typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
 
 int perf_event_mmap(int fd);
+int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header);
 /* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */
 int perf_event_poller(int fd, perf_event_print_fn output_fn);
+int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
+                           int num_fds, perf_event_print_fn output_fn);
 #endif
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
new file mode 100755 (executable)
index 0000000..76f1ab4
--- /dev/null
@@ -0,0 +1,217 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# ../../../net/forwarding/mirror_gre_topo_lib.sh for more details.
+#
+# Test offloading various features of offloading gretap mirrors specific to
+# mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/mirror_lib.sh
+source $lib_dir/mirror_gre_lib.sh
+source $lib_dir/mirror_gre_topo_lib.sh
+
+setup_keyful()
+{
+       tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
+                     ttl 100 tos inherit allow-localremote \
+                     key 1234
+
+       tunnel_create h3-gt6-key ip6gretap 2001:db8:3::2 2001:db8:3::1 \
+                     key 1234
+       ip link set h3-gt6-key vrf v$h3
+       matchall_sink_create h3-gt6-key
+
+       ip address add dev $swp3 2001:db8:3::1/64
+       ip address add dev $h3 2001:db8:3::2/64
+}
+
+cleanup_keyful()
+{
+       ip address del dev $h3 2001:db8:3::2/64
+       ip address del dev $swp3 2001:db8:3::1/64
+
+       tunnel_destroy h3-gt6-key
+       tunnel_destroy gt6-key
+}
+
+setup_soft()
+{
+       # Set up a topology for testing underlay routes that point at an
+       # unsupported soft device.
+
+       tunnel_create gt6-soft ip6gretap 2001:db8:4::1 2001:db8:4::2 \
+                     ttl 100 tos inherit allow-localremote
+
+       tunnel_create h3-gt6-soft ip6gretap 2001:db8:4::2 2001:db8:4::1
+       ip link set h3-gt6-soft vrf v$h3
+       matchall_sink_create h3-gt6-soft
+
+       ip link add name v1 type veth peer name v2
+       ip link set dev v1 up
+       ip address add dev v1 2001:db8:4::1/64
+
+       ip link set dev v2 vrf v$h3
+       ip link set dev v2 up
+       ip address add dev v2 2001:db8:4::2/64
+}
+
+cleanup_soft()
+{
+       ip link del dev v1
+
+       tunnel_destroy h3-gt6-soft
+       tunnel_destroy gt6-soft
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       vrf_prepare
+       mirror_gre_topo_create
+
+       ip address add dev $swp3 2001:db8:2::1/64
+       ip address add dev $h3 2001:db8:2::2/64
+
+       ip address add dev $swp3 192.0.2.129/28
+       ip address add dev $h3 192.0.2.130/28
+
+       setup_keyful
+       setup_soft
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       cleanup_soft
+       cleanup_keyful
+
+       ip address del dev $h3 2001:db8:2::2/64
+       ip address del dev $swp3 2001:db8:2::1/64
+
+       ip address del dev $h3 192.0.2.130/28
+       ip address del dev $swp3 192.0.2.129/28
+
+       mirror_gre_topo_destroy
+       vrf_cleanup
+}
+
+test_span_gre_ttl_inherit()
+{
+       local tundev=$1; shift
+       local type=$1; shift
+       local what=$1; shift
+
+       RET=0
+
+       ip link set dev $tundev type $type ttl inherit
+       mirror_install $swp1 ingress $tundev "matchall $tcflags"
+       fail_test_span_gre_dir $tundev ingress
+
+       ip link set dev $tundev type $type ttl 100
+
+       quick_test_span_gre_dir $tundev ingress
+       mirror_uninstall $swp1 ingress
+
+       log_test "$what: no offload on TTL of inherit ($tcflags)"
+}
+
+test_span_gre_tos_fixed()
+{
+       local tundev=$1; shift
+       local type=$1; shift
+       local what=$1; shift
+
+       RET=0
+
+       ip link set dev $tundev type $type tos 0x10
+       mirror_install $swp1 ingress $tundev "matchall $tcflags"
+       fail_test_span_gre_dir $tundev ingress
+
+       ip link set dev $tundev type $type tos inherit
+       quick_test_span_gre_dir $tundev ingress
+       mirror_uninstall $swp1 ingress
+
+       log_test "$what: no offload on a fixed TOS ($tcflags)"
+}
+
+test_span_failable()
+{
+       local should_fail=$1; shift
+       local tundev=$1; shift
+       local what=$1; shift
+
+       RET=0
+
+       mirror_install $swp1 ingress $tundev "matchall $tcflags"
+       if ((should_fail)); then
+           fail_test_span_gre_dir  $tundev ingress
+       else
+           quick_test_span_gre_dir $tundev ingress
+       fi
+       mirror_uninstall $swp1 ingress
+
+       log_test "$what: should_fail=$should_fail ($tcflags)"
+}
+
+test_failable()
+{
+       local should_fail=$1; shift
+
+       test_span_failable $should_fail gt6-key "mirror to keyful gretap"
+       test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay"
+}
+
+test_sw()
+{
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       test_failable 0
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+test_hw()
+{
+       test_failable 1
+
+       test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
+       test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+
+       test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
+       test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+if ! tc_offload_check; then
+    check_err 1 "Could not test offloaded functionality"
+    log_test "mlxsw-specific tests for mirror to gretap"
+    exit
+fi
+
+tcflags="skip_hw"
+test_sw
+
+tcflags="skip_sw"
+test_hw
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
new file mode 100644 (file)
index 0000000..6f3a70d
--- /dev/null
@@ -0,0 +1,197 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Test offloading a number of mirrors-to-gretap. The test creates a number of
+# tunnels. Then it adds one flower mirror for each of the tunnels, matching a
+# given host IP. Then it generates traffic at each of the host IPs and checks
+# that the traffic has been mirrored at the appropriate tunnel.
+#
+#   +--------------------------+                   +--------------------------+
+#   | H1                       |                   |                       H2 |
+#   |     + $h1                |                   |                $h2 +     |
+#   |     | 2001:db8:1:X::1/64 |                   | 2001:db8:1:X::2/64 |     |
+#   +-----|--------------------+                   +--------------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirrors                                                  |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3                          + gt6-<X> (ip6gretap)              |
+#   |     | 2001:db8:2:X::1/64             : loc=2001:db8:2:X::1              |
+#   |     |                                : rem=2001:db8:2:X::2              |
+#   |     |                                : ttl=100                          |
+#   |     |                                : tos=inherit                      |
+#   |     |                                :                                  |
+#   +-----|--------------------------------:----------------------------------+
+#         |                                :
+#   +-----|--------------------------------:----------------------------------+
+#   | H3  + $h3                            + h3-gt6-<X> (ip6gretap)           |
+#   |       2001:db8:2:X::2/64               loc=2001:db8:2:X::2              |
+#   |                                        rem=2001:db8:2:X::1              |
+#   |                                        ttl=100                          |
+#   |                                        tos=inherit                      |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+source ../../../../net/forwarding/mirror_lib.sh
+
+MIRROR_NUM_NETIFS=6
+
+mirror_gre_ipv6_addr()
+{
+       local net=$1; shift
+       local num=$1; shift
+
+       printf "2001:db8:%x:%x" $net $num
+}
+
+mirror_gre_tunnels_create()
+{
+       local count=$1; shift
+       local should_fail=$1; shift
+
+       MIRROR_GRE_BATCH_FILE="$(mktemp)"
+       for ((i=0; i < count; ++i)); do
+               local match_dip=$(mirror_gre_ipv6_addr 1 $i)::2
+               local htun=h3-gt6-$i
+               local tun=gt6-$i
+
+               ((mirror_gre_tunnels++))
+
+               ip address add dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+               ip address add dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+
+               ip address add dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+               ip address add dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+
+               tunnel_create $tun ip6gretap \
+                             $(mirror_gre_ipv6_addr 2 $i)::1 \
+                             $(mirror_gre_ipv6_addr 2 $i)::2 \
+                             ttl 100 tos inherit allow-localremote
+
+               tunnel_create $htun ip6gretap \
+                             $(mirror_gre_ipv6_addr 2 $i)::2 \
+                             $(mirror_gre_ipv6_addr 2 $i)::1
+               ip link set $htun vrf v$h3
+               matchall_sink_create $htun
+
+               cat >> $MIRROR_GRE_BATCH_FILE <<-EOF
+                       filter add dev $swp1 ingress pref 1000 \
+                               protocol ipv6 \
+                               flower $tcflags dst_ip $match_dip \
+                               action mirred egress mirror dev $tun
+               EOF
+       done
+
+       tc -b $MIRROR_GRE_BATCH_FILE
+       check_err_fail $should_fail $? "Mirror rule insertion"
+}
+
+mirror_gre_tunnels_destroy()
+{
+       local count=$1; shift
+
+       for ((i=0; i < count; ++i)); do
+               local htun=h3-gt6-$i
+               local tun=gt6-$i
+
+               ip address del dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+               ip address del dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+
+               ip address del dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+               ip address del dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+
+               tunnel_destroy $htun
+               tunnel_destroy $tun
+       done
+}
+
+__mirror_gre_test()
+{
+       local count=$1; shift
+       local should_fail=$1; shift
+
+       mirror_gre_tunnels_create $count $should_fail
+       if ((should_fail)); then
+           return
+       fi
+
+       sleep 5
+
+       for ((i = 0; i < count; ++i)); do
+               local dip=$(mirror_gre_ipv6_addr 1 $i)::2
+               local htun=h3-gt6-$i
+               local message
+
+               icmp6_capture_install $htun
+               mirror_test v$h1 "" $dip $htun 100 10
+               icmp6_capture_uninstall $htun
+       done
+}
+
+mirror_gre_test()
+{
+       local count=$1; shift
+       local should_fail=$1; shift
+
+       if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+               check_err 1 "Could not test offloaded functionality"
+               return
+       fi
+
+       tcflags="skip_sw"
+       __mirror_gre_test $count $should_fail
+}
+
+mirror_gre_setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       mirror_gre_tunnels=0
+
+       vrf_prepare
+
+       simple_if_init $h1
+       simple_if_init $h2
+       simple_if_init $h3
+
+       ip link add name br1 type bridge vlan_filtering 1
+       ip link set dev br1 up
+
+       ip link set dev $swp1 master br1
+       ip link set dev $swp1 up
+       tc qdisc add dev $swp1 clsact
+
+       ip link set dev $swp2 master br1
+       ip link set dev $swp2 up
+
+       ip link set dev $swp3 up
+}
+
+mirror_gre_cleanup()
+{
+       mirror_gre_tunnels_destroy $mirror_gre_tunnels
+
+       ip link set dev $swp3 down
+
+       ip link set dev $swp2 down
+
+       tc qdisc del dev $swp1 clsact
+       ip link set dev $swp1 down
+
+       ip link del dev br1
+
+       simple_if_fini $h3
+       simple_if_fini $h2
+       simple_if_fini $h1
+
+       vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
new file mode 100755 (executable)
index 0000000..1ca631d
--- /dev/null
@@ -0,0 +1,189 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization and rewrite. Packets ingress $swp1 with a DSCP
+# tag and are prioritized according to the map at $swp1. They egress $swp2 and
+# the DSCP value is updated to match the map at that interface. The updated DSCP
+# tag is verified at $h2.
+#
+# ICMP responses are produced with the same DSCP tag that arrived at $h2. They
+# go through prioritization at $swp2 and DSCP retagging at $swp1. The tag is
+# verified at $h1--it should match the original tag.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  |   APP=0,5,10 .. 7,5,17                      APP=0,5,20 .. 7,5,27   |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+       ping_ipv4
+       test_dscp
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+h1_create()
+{
+       local dscp;
+
+       simple_if_init $h1 192.0.2.1/28
+       tc qdisc add dev $h1 clsact
+       dscp_capture_install $h1 10
+}
+
+h1_destroy()
+{
+       dscp_capture_uninstall $h1 10
+       tc qdisc del dev $h1 clsact
+       simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/28
+       tc qdisc add dev $h2 clsact
+       dscp_capture_install $h2 20
+}
+
+h2_destroy()
+{
+       dscp_capture_uninstall $h2 20
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.2/28
+}
+
+dscp_map()
+{
+       local base=$1; shift
+
+       for prio in {0..7}; do
+               echo app=$prio,5,$((base + prio))
+       done
+}
+
+switch_create()
+{
+       ip link add name br1 type bridge vlan_filtering 1
+       ip link set dev br1 up
+       ip link set dev $swp1 master br1
+       ip link set dev $swp1 up
+       ip link set dev $swp2 master br1
+       ip link set dev $swp2 up
+
+       lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null
+       lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null
+       lldpad_app_wait_set $swp1
+       lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+       lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null
+       lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
+       lldpad_app_wait_del
+
+       ip link set dev $swp2 nomaster
+       ip link set dev $swp1 nomaster
+       ip link del dev br1
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+ping_ipv4()
+{
+       ping_test $h1 192.0.2.2
+}
+
+dscp_ping_test()
+{
+       local vrf_name=$1; shift
+       local sip=$1; shift
+       local dip=$1; shift
+       local prio=$1; shift
+       local dev_10=$1; shift
+       local dev_20=$1; shift
+
+       local dscp_10=$(((prio + 10) << 2))
+       local dscp_20=$(((prio + 20) << 2))
+
+       RET=0
+
+       local -A t0s
+       eval "t0s=($(dscp_fetch_stats $dev_10 10)
+                  $(dscp_fetch_stats $dev_20 20))"
+
+       ip vrf exec $vrf_name \
+          ${PING} -Q $dscp_10 ${sip:+-I $sip} $dip \
+                  -c 10 -i 0.1 -w 2 &> /dev/null
+
+       local -A t1s
+       eval "t1s=($(dscp_fetch_stats $dev_10 10)
+                  $(dscp_fetch_stats $dev_20 20))"
+
+       for key in ${!t0s[@]}; do
+               local expect
+               if ((key == prio+10 || key == prio+20)); then
+                       expect=10
+               else
+                       expect=0
+               fi
+
+               local delta=$((t1s[$key] - t0s[$key]))
+               ((expect == delta))
+               check_err $? "DSCP $key: Expected to capture $expect packets, got $delta."
+       done
+
+       log_test "DSCP rewrite: $dscp_10-(prio $prio)-$dscp_20"
+}
+
+test_dscp()
+{
+       for prio in {0..7}; do
+               dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2
+       done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
new file mode 100755 (executable)
index 0000000..281d907
--- /dev/null
@@ -0,0 +1,233 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization in the router.
+#
+# With ip_forward_update_priority disabled, the packets are expected to keep
+# their DSCP (which in this test uses only values 0..7) intact as they are
+# forwarded by the switch. That is verified at $h2. ICMP responses are formed
+# with the same DSCP as the requests, and likewise pass through the switch
+# intact, which is verified at $h1.
+#
+# With ip_forward_update_priority enabled, router reprioritizes the packets
+# according to the table in reprioritize(). Thus, say, DSCP 7 maps to priority
+# 4, which on egress maps back to DSCP 4. The response packet then gets
+# reprioritized to 6, getting DSCP 6 on egress.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |  192.0.2.18/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |    + $swp1                                                    $swp2 +     |
+# |      192.0.2.2/28                                     192.0.2.17/28       |
+# |      APP=0,5,0 .. 7,5,7                          APP=0,5,0 .. 7,5,7       |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+       ping_ipv4
+       test_update
+       test_no_update
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+reprioritize()
+{
+       local in=$1; shift
+
+       # This is based on rt_tos2priority in include/net/route.h. Assuming 1:1
+       # mapping between priorities and TOS, it yields a new priority for a
+       # packet with ingress priority of $in.
+       local -a reprio=(0 0 2 2 6 6 4 4)
+
+       echo ${reprio[$in]}
+}
+
+h1_create()
+{
+       local dscp;
+
+       simple_if_init $h1 192.0.2.1/28
+       tc qdisc add dev $h1 clsact
+       dscp_capture_install $h1 0
+       ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+       ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+       dscp_capture_uninstall $h1 0
+       tc qdisc del dev $h1 clsact
+       simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.18/28
+       tc qdisc add dev $h2 clsact
+       dscp_capture_install $h2 0
+       ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+       ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+       dscp_capture_uninstall $h2 0
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2 192.0.2.18/28
+}
+
+dscp_map()
+{
+       local base=$1; shift
+
+       for prio in {0..7}; do
+               echo app=$prio,5,$((base + prio))
+       done
+}
+
+switch_create()
+{
+       simple_if_init $swp1 192.0.2.2/28
+       __simple_if_init $swp2 v$swp1 192.0.2.17/28
+
+       lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
+       lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
+       lldpad_app_wait_set $swp1
+       lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+       lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
+       lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
+       lldpad_app_wait_del
+
+       __simple_if_fini $swp2 192.0.2.17/28
+       simple_if_fini $swp1 192.0.2.2/28
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       sysctl_set net.ipv4.ip_forward_update_priority 1
+       h1_create
+       h2_create
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+       h2_destroy
+       h1_destroy
+       sysctl_restore net.ipv4.ip_forward_update_priority
+
+       vrf_cleanup
+}
+
+ping_ipv4()
+{
+       ping_test $h1 192.0.2.18
+}
+
+dscp_ping_test()
+{
+       local vrf_name=$1; shift
+       local sip=$1; shift
+       local dip=$1; shift
+       local prio=$1; shift
+       local reprio=$1; shift
+       local dev1=$1; shift
+       local dev2=$1; shift
+
+       local prio2=$($reprio $prio)   # ICMP Request egress prio
+       local prio3=$($reprio $prio2)  # ICMP Response egress prio
+
+       local dscp=$((prio << 2))     # ICMP Request ingress DSCP
+       local dscp2=$((prio2 << 2))   # ICMP Request egress DSCP
+       local dscp3=$((prio3 << 2))   # ICMP Response egress DSCP
+
+       RET=0
+
+       eval "local -A dev1_t0s=($(dscp_fetch_stats $dev1 0))"
+       eval "local -A dev2_t0s=($(dscp_fetch_stats $dev2 0))"
+
+       ip vrf exec $vrf_name \
+          ${PING} -Q $dscp ${sip:+-I $sip} $dip \
+                  -c 10 -i 0.1 -w 2 &> /dev/null
+
+       eval "local -A dev1_t1s=($(dscp_fetch_stats $dev1 0))"
+       eval "local -A dev2_t1s=($(dscp_fetch_stats $dev2 0))"
+
+       for i in {0..7}; do
+               local dscpi=$((i << 2))
+               local expect2=0
+               local expect3=0
+
+               if ((i == prio2)); then
+                       expect2=10
+               fi
+               if ((i == prio3)); then
+                       expect3=10
+               fi
+
+               local delta=$((dev2_t1s[$i] - dev2_t0s[$i]))
+               ((expect2 == delta))
+               check_err $? "DSCP $dscpi@$dev2: Expected to capture $expect2 packets, got $delta."
+
+               delta=$((dev1_t1s[$i] - dev1_t0s[$i]))
+               ((expect3 == delta))
+               check_err $? "DSCP $dscpi@$dev1: Expected to capture $expect3 packets, got $delta."
+       done
+
+       log_test "DSCP rewrite: $dscp-(prio $prio2)-$dscp2-(prio $prio3)-$dscp3"
+}
+
+__test_update()
+{
+       local update=$1; shift
+       local reprio=$1; shift
+
+       sysctl_restore net.ipv4.ip_forward_update_priority
+       sysctl_set net.ipv4.ip_forward_update_priority $update
+
+       for prio in {0..7}; do
+               dscp_ping_test v$h1 192.0.2.1 192.0.2.18 $prio $reprio $h1 $h2
+       done
+}
+
+test_update()
+{
+       __test_update 1 reprioritize
+}
+
+test_no_update()
+{
+       __test_update 0 echo
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
new file mode 100644 (file)
index 0000000..d231649
--- /dev/null
@@ -0,0 +1,167 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ROUTER_NUM_NETIFS=4
+
+router_h1_create()
+{
+       simple_if_init $h1 192.0.1.1/24
+       ip route add 193.0.0.0/8 via 192.0.1.2 dev $h1
+}
+
+router_h1_destroy()
+{
+       ip route del 193.0.0.0/8 via 192.0.1.2 dev $h1
+       simple_if_fini $h1 192.0.1.1/24
+}
+
+router_h2_create()
+{
+       simple_if_init $h2 192.0.2.1/24
+       tc qdisc add dev $h2 handle ffff: ingress
+}
+
+router_h2_destroy()
+{
+       tc qdisc del dev $h2 handle ffff: ingress
+       simple_if_fini $h2 192.0.2.1/24
+}
+
+router_create()
+{
+       ip link set dev $rp1 up
+       ip link set dev $rp2 up
+
+       ip address add 192.0.1.2/24 dev $rp1
+       ip address add 192.0.2.2/24 dev $rp2
+}
+
+router_destroy()
+{
+       ip address del 192.0.2.2/24 dev $rp2
+       ip address del 192.0.1.2/24 dev $rp1
+
+       ip link set dev $rp2 down
+       ip link set dev $rp1 down
+}
+
+router_setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       rp1=${NETIFS[p2]}
+
+       rp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       h1mac=$(mac_get $h1)
+       rp1mac=$(mac_get $rp1)
+
+       vrf_prepare
+
+       router_h1_create
+       router_h2_create
+
+       router_create
+}
+
+router_offload_validate()
+{
+       local route_count=$1
+       local offloaded_count
+
+       offloaded_count=$(ip route | grep -o 'offload' | wc -l)
+       [[ $offloaded_count -ge $route_count ]]
+}
+
+router_routes_create()
+{
+       local route_count=$1
+       local count=0
+
+       ROUTE_FILE="$(mktemp)"
+
+       for i in {0..255}
+       do
+               for j in {0..255}
+               do
+                       for k in {0..255}
+                       do
+                               if [[ $count -eq $route_count ]]; then
+                                       break 3
+                               fi
+
+                               echo route add 193.${i}.${j}.${k}/32 via \
+                                      192.0.2.1 dev $rp2  >> $ROUTE_FILE
+                               ((count++))
+                       done
+               done
+       done
+
+       ip -b $ROUTE_FILE &> /dev/null
+}
+
+router_routes_destroy()
+{
+       if [[ -v ROUTE_FILE ]]; then
+               rm -f $ROUTE_FILE
+       fi
+}
+
+router_test()
+{
+       local route_count=$1
+       local should_fail=$2
+       local count=0
+
+       RET=0
+
+       router_routes_create $route_count
+
+       router_offload_validate $route_count
+       check_err_fail $should_fail $? "Offload of $route_count routes"
+       if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then
+               return
+       fi
+
+       tc filter add dev $h2 ingress protocol ip pref 1 flower \
+               skip_sw dst_ip 193.0.0.0/8 action drop
+
+       for i in {0..255}
+       do
+               for j in {0..255}
+               do
+                       for k in {0..255}
+                       do
+                               if [[ $count -eq $route_count ]]; then
+                                       break 3
+                               fi
+
+                               $MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac \
+                                       -A 192.0.1.1 -B 193.${i}.${j}.${k} \
+                                       -t ip -q
+                               ((count++))
+                       done
+               done
+       done
+
+       tc_check_packets "dev $h2 ingress" 1 $route_count
+       check_err $? "Offload mismatch"
+
+       tc filter del dev $h2 ingress protocol ip pref 1 flower \
+               skip_sw dst_ip 193.0.0.0/8 action drop
+
+       router_routes_destroy
+}
+
+router_cleanup()
+{
+       pre_cleanup
+
+       router_routes_destroy
+       router_destroy
+
+       router_h2_destroy
+       router_h1_destroy
+
+       vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
new file mode 100644 (file)
index 0000000..73035e2
--- /dev/null
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "../../../../net/forwarding/devlink_lib.sh"
+
+if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then
+       echo "SKIP: test is tailored for Mellanox Spectrum"
+       exit 1
+fi
+
+# Needed for returning to default
+declare -A KVD_DEFAULTS
+
+KVD_CHILDREN="linear hash_single hash_double"
+KVDL_CHILDREN="singles chunks large_chunks"
+
+devlink_sp_resource_minimize()
+{
+       local size
+       local i
+
+       for i in $KVD_CHILDREN; do
+               size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+               devlink_resource_size_set "$size" kvd "$i"
+       done
+
+       for i in $KVDL_CHILDREN; do
+               size=$(devlink_resource_get kvd linear "$i" | \
+                      jq '.["size_min"]')
+               devlink_resource_size_set "$size" kvd linear "$i"
+       done
+}
+
+devlink_sp_size_kvd_to_default()
+{
+       local need_reload=0
+       local i
+
+       for i in $KVD_CHILDREN; do
+               local size=$(echo "${KVD_DEFAULTS[kvd_$i]}" | jq '.["size"]')
+               current_size=$(devlink_resource_size_get kvd "$i")
+
+               if [ "$size" -ne "$current_size" ]; then
+                       devlink_resource_size_set "$size" kvd "$i"
+                       need_reload=1
+               fi
+       done
+
+       for i in $KVDL_CHILDREN; do
+               local size=$(echo "${KVD_DEFAULTS[kvd_linear_$i]}" | \
+                            jq '.["size"]')
+               current_size=$(devlink_resource_size_get kvd linear "$i")
+
+               if [ "$size" -ne "$current_size" ]; then
+                       devlink_resource_size_set "$size" kvd linear "$i"
+                       need_reload=1
+               fi
+       done
+
+       if [ "$need_reload" -ne "0" ]; then
+               devlink_reload
+       fi
+}
+
+devlink_sp_read_kvd_defaults()
+{
+       local key
+       local i
+
+       KVD_DEFAULTS[kvd]=$(devlink_resource_get "kvd")
+       for i in $KVD_CHILDREN; do
+               key=kvd_$i
+               KVD_DEFAULTS[$key]=$(devlink_resource_get kvd "$i")
+       done
+
+       for i in $KVDL_CHILDREN; do
+               key=kvd_linear_$i
+               KVD_DEFAULTS[$key]=$(devlink_resource_get kvd linear "$i")
+       done
+}
+
+KVD_PROFILES="default scale ipv4_max"
+
+devlink_sp_resource_kvd_profile_set()
+{
+       local profile=$1
+
+       case "$profile" in
+       scale)
+               devlink_resource_size_set 64000 kvd linear
+               devlink_resource_size_set 15616 kvd linear singles
+               devlink_resource_size_set 32000 kvd linear chunks
+               devlink_resource_size_set 16384 kvd linear large_chunks
+               devlink_resource_size_set 128000 kvd hash_single
+               devlink_resource_size_set 48000 kvd hash_double
+               devlink_reload
+               ;;
+       ipv4_max)
+               devlink_resource_size_set 64000 kvd linear
+               devlink_resource_size_set 15616 kvd linear singles
+               devlink_resource_size_set 32000 kvd linear chunks
+               devlink_resource_size_set 16384 kvd linear large_chunks
+               devlink_resource_size_set 144000 kvd hash_single
+               devlink_resource_size_set 32768 kvd hash_double
+               devlink_reload
+               ;;
+       default)
+               devlink_resource_size_set 98304 kvd linear
+               devlink_resource_size_set 16384 kvd linear singles
+               devlink_resource_size_set 49152 kvd linear chunks
+               devlink_resource_size_set 32768 kvd linear large_chunks
+               devlink_resource_size_set 87040 kvd hash_single
+               devlink_resource_size_set 60416 kvd hash_double
+               devlink_reload
+               ;;
+       *)
+               check_err 1 "Unknown profile $profile"
+       esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
new file mode 100755 (executable)
index 0000000..b1fe960
--- /dev/null
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=1
+source devlink_lib_spectrum.sh
+
+setup_prepare()
+{
+       devlink_sp_read_kvd_defaults
+}
+
+cleanup()
+{
+       pre_cleanup
+       devlink_sp_size_kvd_to_default
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+profiles_test()
+{
+       local i
+
+       log_info "Running profile tests"
+
+       for i in $KVD_PROFILES; do
+               RET=0
+               devlink_sp_resource_kvd_profile_set $i
+               log_test "'$i' profile"
+       done
+
+       # Default is explicitly tested at end to ensure it's actually applied
+       RET=0
+       devlink_sp_resource_kvd_profile_set "default"
+       log_test "'default' profile"
+}
+
+resources_min_test()
+{
+       local size
+       local i
+       local j
+
+       log_info "Running KVD-minimum tests"
+
+       for i in $KVD_CHILDREN; do
+               RET=0
+               size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+               devlink_resource_size_set "$size" kvd "$i"
+
+               # In case of linear, need to minimize sub-resources as well
+               if [[ "$i" == "linear" ]]; then
+                       for j in $KVDL_CHILDREN; do
+                               devlink_resource_size_set 0 kvd linear "$j"
+                       done
+               fi
+
+               devlink_reload
+               devlink_sp_size_kvd_to_default
+               log_test "'$i' minimize [$size]"
+       done
+}
+
+resources_max_test()
+{
+       local min_size
+       local size
+       local i
+       local j
+
+       log_info "Running KVD-maximum tests"
+       for i in $KVD_CHILDREN; do
+               RET=0
+               devlink_sp_resource_minimize
+
+               # Calculate the maximum possible size for the given partition
+               size=$(devlink_resource_size_get kvd)
+               for j in $KVD_CHILDREN; do
+                       if [ "$i" != "$j" ]; then
+                               min_size=$(devlink_resource_get kvd "$j" | \
+                                          jq '.["size_min"]')
+                               size=$((size - min_size))
+                       fi
+               done
+
+               # Test almost maximum size
+               devlink_resource_size_set "$((size - 128))" kvd "$i"
+               devlink_reload
+               log_test "'$i' almost maximize [$((size - 128))]"
+
+               # Test above maximum size
+               devlink resource set "$DEVLINK_DEV" \
+                       path "kvd/$i" size $((size + 128)) &> /dev/null
+               check_fail $? "Set kvd/$i to size $((size + 128)) should fail"
+               log_test "'$i' Overflow rejection [$((size + 128))]"
+
+               # Test maximum size
+               if [ "$i" == "hash_single" ] || [ "$i" == "hash_double" ]; then
+                       echo "SKIP: Observed problem with exact max $i"
+                       continue
+               fi
+
+               devlink_resource_size_set "$size" kvd "$i"
+               devlink_reload
+               log_test "'$i' maximize [$size]"
+
+               devlink_sp_size_kvd_to_default
+       done
+}
+
+profiles_test
+resources_min_test
+resources_max_test
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
new file mode 100644 (file)
index 0000000..8d2186c
--- /dev/null
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+       local should_fail=$1; shift
+
+       if ((! should_fail)); then
+               echo 3
+       else
+               echo 4
+       fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
new file mode 100755 (executable)
index 0000000..a0a80e1
--- /dev/null
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=6
+source ../../../../net/forwarding/lib.sh
+source ../../../../net/forwarding/tc_common.sh
+source devlink_lib_spectrum.sh
+
+current_test=""
+
+cleanup()
+{
+       pre_cleanup
+       if [ ! -z $current_test ]; then
+               ${current_test}_cleanup
+       fi
+       devlink_sp_size_kvd_to_default
+}
+
+devlink_sp_read_kvd_defaults
+trap cleanup EXIT
+
+ALL_TESTS="router tc_flower mirror_gre"
+for current_test in ${TESTS:-$ALL_TESTS}; do
+       source ${current_test}_scale.sh
+
+       num_netifs_var=${current_test^^}_NUM_NETIFS
+       num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+       for profile in $KVD_PROFILES; do
+               RET=0
+               devlink_sp_resource_kvd_profile_set $profile
+               if [[ $RET -gt 0 ]]; then
+                       log_test "'$current_test' [$profile] setting"
+                       continue
+               fi
+
+               for should_fail in 0 1; do
+                       RET=0
+                       target=$(${current_test}_get_target "$should_fail")
+                       ${current_test}_setup_prepare
+                       setup_wait $num_netifs
+                       ${current_test}_test "$target" "$should_fail"
+                       ${current_test}_cleanup
+                       if [[ "$should_fail" -eq 0 ]]; then
+                               log_test "'$current_test' [$profile] $target"
+                       else
+                               log_test "'$current_test' [$profile] overflow $target"
+                       fi
+               done
+       done
+done
+current_test=""
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
new file mode 100644 (file)
index 0000000..21c4697
--- /dev/null
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../router_scale.sh
+
+router_get_target()
+{
+       local should_fail=$1
+       local target
+
+       target=$(devlink_resource_size_get kvd hash_single)
+
+       if [[ $should_fail -eq 0 ]]; then
+               target=$((target * 85 / 100))
+       else
+               target=$((target + 1))
+       fi
+
+       echo $target
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
new file mode 100644 (file)
index 0000000..f9bfd89
--- /dev/null
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+       local should_fail=$1; shift
+
+       # 6144 (6x1024) is the theoretical maximum.
+       # One bank of 512 rules is taken by the 18-byte MC router rule.
+       # One rule is the ACL catch-all.
+       # 6144 - 512 - 1 = 5631
+       local target=5631
+
+       if ((! should_fail)); then
+               echo $target
+       else
+               echo $((target + 1))
+       fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
new file mode 100644 (file)
index 0000000..a6d733d
--- /dev/null
@@ -0,0 +1,134 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for resource limit of offloaded flower rules. The test adds a given
+# number of flower matches for different IPv6 addresses, then generates traffic,
+# and ensures each was hit exactly once. This file contains functions to set up
+# a testing topology and run the test, and is meant to be sourced from a test
+# script that calls the testing routine with a given number of rules.
+
+TC_FLOWER_NUM_NETIFS=2
+
+tc_flower_h1_create()
+{
+       simple_if_init $h1
+       tc qdisc add dev $h1 clsact
+}
+
+tc_flower_h1_destroy()
+{
+       tc qdisc del dev $h1 clsact
+       simple_if_fini $h1
+}
+
+tc_flower_h2_create()
+{
+       simple_if_init $h2
+       tc qdisc add dev $h2 clsact
+}
+
+tc_flower_h2_destroy()
+{
+       tc qdisc del dev $h2 clsact
+       simple_if_fini $h2
+}
+
+tc_flower_setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       h2=${NETIFS[p2]}
+
+       vrf_prepare
+
+       tc_flower_h1_create
+       tc_flower_h2_create
+}
+
+tc_flower_cleanup()
+{
+       pre_cleanup
+
+       tc_flower_h2_destroy
+       tc_flower_h1_destroy
+
+       vrf_cleanup
+
+       if [[ -v TC_FLOWER_BATCH_FILE ]]; then
+               rm -f $TC_FLOWER_BATCH_FILE
+       fi
+}
+
+tc_flower_addr()
+{
+       local num=$1; shift
+
+       printf "2001:db8:1::%x" $num
+}
+
+tc_flower_rules_create()
+{
+       local count=$1; shift
+       local should_fail=$1; shift
+
+       TC_FLOWER_BATCH_FILE="$(mktemp)"
+
+       for ((i = 0; i < count; ++i)); do
+               cat >> $TC_FLOWER_BATCH_FILE <<-EOF
+                       filter add dev $h2 ingress \
+                               prot ipv6 \
+                               pref 1000 \
+                               flower $tcflags dst_ip $(tc_flower_addr $i) \
+                               action drop
+               EOF
+       done
+
+       tc -b $TC_FLOWER_BATCH_FILE
+       check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_flower_test()
+{
+       local count=$1; shift
+       local should_fail=$1; shift
+       local last=$((count - 1))
+
+       tc_flower_rules_create $count $should_fail
+
+       for ((i = 0; i < count; ++i)); do
+               $MZ $h1 -q -c 1 -t ip -p 20 -b bc -6 \
+                       -A 2001:db8:2::1 \
+                       -B $(tc_flower_addr $i)
+       done
+
+       MISMATCHES=$(
+               tc -j -s filter show dev $h2 ingress |
+               jq -r '[ .[] | select(.kind == "flower") | .options |
+                        values as $rule | .actions[].stats.packets |
+                        select(. != 1) | "\(.) on \($rule.keys.dst_ip)" ] |
+                      join(", ")'
+       )
+
+       test -z "$MISMATCHES"
+       check_err $? "Expected to capture 1 packet for each IP, but got $MISMATCHES"
+}
+
+tc_flower_test()
+{
+       local count=$1; shift
+       local should_fail=$1; shift
+
+       # We use lower 16 bits of IPv6 address for match. Also there are only 16
+       # bits of rule priority space.
+       if ((count > 65536)); then
+               check_err 1 "Invalid count of $count. At most 65536 rules supported"
+               return
+       fi
+
+       if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+               check_err 1 "Could not test offloaded functionality"
+               return
+       fi
+
+       tcflags="skip_sw"
+       __tc_flower_test $count $should_fail
+}
index 1a0ac3a29ec5f8c9f0052e47e074f40089c634c5..78b24cf76f40167d0e17e372a92587465d86757c 100644 (file)
@@ -13,3 +13,4 @@ udpgso
 udpgso_bench_rx
 udpgso_bench_tx
 tcp_inq
+tls
index 663e11e85727416791c1cf4a3ac8ae7a4c64a646..9cca68e440a0b7107c10b1859e6fc8fb2ed2fe4f 100644 (file)
@@ -13,7 +13,7 @@ TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
 TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
 include ../lib.mk
 
index 4a0964c42860cba4cd8f7a097c376c065cfaf5eb..b8a2af8fcfb796c729ba183c4af729d53e9c482f 100644 (file)
@@ -46,6 +46,8 @@ Guidelines for Writing Tests
 
 o Where possible, reuse an existing topology for different tests instead
   of recreating the same topology.
+o Tests that use anything but the most trivial topologies should include
+  an ASCII art showing the topology.
 o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
   RFC 5737, respectively.
 o Where possible, tests shall be written so that they can be reused by
diff --git a/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh b/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh
new file mode 100755 (executable)
index 0000000..a43b464
--- /dev/null
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 flooding"
+NUM_NETIFS=6
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+       simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+       simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h3_create()
+{
+       simple_if_init $h3 192.0.2.3/24 2001:db8:1::3/64
+}
+
+h3_destroy()
+{
+       simple_if_fini $h3 192.0.2.3/24 2001:db8:1::3/64
+}
+
+switch_create()
+{
+       ip link add dev br0 type bridge
+
+       ip link set dev $swp1 master br0
+       ip link set dev $swp2 master br0
+       ip link set dev $swp3 master br0
+
+       ip link set dev $swp1 type bridge_slave isolated on
+       check_err $? "Can't set isolation on port $swp1"
+       ip link set dev $swp2 type bridge_slave isolated on
+       check_err $? "Can't set isolation on port $swp2"
+       ip link set dev $swp3 type bridge_slave isolated off
+       check_err $? "Can't disable isolation on port $swp3"
+
+       ip link set dev br0 up
+       ip link set dev $swp1 up
+       ip link set dev $swp2 up
+       ip link set dev $swp3 up
+}
+
+switch_destroy()
+{
+       ip link set dev $swp3 down
+       ip link set dev $swp2 down
+       ip link set dev $swp1 down
+
+       ip link del dev br0
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       h3_create
+
+       switch_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       switch_destroy
+
+       h3_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+ping_ipv4()
+{
+       RET=0
+       ping_do $h1 192.0.2.2
+       check_fail $? "Ping worked when it should not have"
+
+       RET=0
+       ping_do $h3 192.0.2.2
+       check_err $? "Ping didn't work when it should have"
+
+       log_test "Isolated port ping"
+}
+
+ping_ipv6()
+{
+       RET=0
+       ping6_do $h1 2001:db8:1::2
+       check_fail $? "Ping6 worked when it should not have"
+
+       RET=0
+       ping6_do $h3 2001:db8:1::2
+       check_err $? "Ping6 didn't work when it should have"
+
+       log_test "Isolated port ping6"
+}
+
+flooding()
+{
+       local mac=de:ad:be:ef:13:37
+       local ip=192.0.2.100
+
+       RET=0
+       flood_test_do false $mac $ip $h1 $h2
+       check_err $? "Packet was flooded when it should not have been"
+
+       RET=0
+       flood_test_do true $mac $ip $h3 $h2
+       check_err $? "Packet was not flooded when it should have been"
+
+       log_test "Isolated port flooding"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
new file mode 100644 (file)
index 0000000..5ab1e5f
--- /dev/null
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Source library
+
+relative_path="${BASH_SOURCE%/*}"
+if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
+       relative_path="."
+fi
+
+source "$relative_path/lib.sh"
+
+##############################################################################
+# Defines
+
+DEVLINK_DEV=$(devlink port show | grep "${NETIFS[p1]}" | \
+             grep -v "${NETIFS[p1]}[0-9]" | cut -d" " -f1 | \
+             rev | cut -d"/" -f2- | rev)
+if [ -z "$DEVLINK_DEV" ]; then
+       echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
+       exit 1
+fi
+if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
+       echo "SKIP: devlink device's bus is not PCI"
+       exit 1
+fi
+
+DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
+                -n | cut -d" " -f3)
+
+##############################################################################
+# Sanity checks
+
+devlink -j resource show "$DEVLINK_DEV" &> /dev/null
+if [ $? -ne 0 ]; then
+       echo "SKIP: iproute2 too old, missing devlink resource support"
+       exit 1
+fi
+
+##############################################################################
+# Devlink helpers
+
+devlink_resource_names_to_path()
+{
+       local resource
+       local path=""
+
+       for resource in "${@}"; do
+               if [ "$path" == "" ]; then
+                       path="$resource"
+               else
+                       path="${path}/$resource"
+               fi
+       done
+
+       echo "$path"
+}
+
+devlink_resource_get()
+{
+       local name=$1
+       local resource_name=.[][\"$DEVLINK_DEV\"]
+
+       resource_name="$resource_name | .[] | select (.name == \"$name\")"
+
+       shift
+       for resource in "${@}"; do
+               resource_name="${resource_name} | .[\"resources\"][] | \
+                              select (.name == \"$resource\")"
+       done
+
+       devlink -j resource show "$DEVLINK_DEV" | jq "$resource_name"
+}
+
+devlink_resource_size_get()
+{
+       local size=$(devlink_resource_get "$@" | jq '.["size_new"]')
+
+       if [ "$size" == "null" ]; then
+               devlink_resource_get "$@" | jq '.["size"]'
+       else
+               echo "$size"
+       fi
+}
+
+devlink_resource_size_set()
+{
+       local new_size=$1
+       local path
+
+       shift
+       path=$(devlink_resource_names_to_path "$@")
+       devlink resource set "$DEVLINK_DEV" path "$path" size "$new_size"
+       check_err $? "Failed setting path $path to size $size"
+}
+
+devlink_reload()
+{
+       local still_pending
+
+       devlink dev reload "$DEVLINK_DEV" &> /dev/null
+       check_err $? "Failed reload"
+
+       still_pending=$(devlink resource show "$DEVLINK_DEV" | \
+                       grep -c "size_new")
+       check_err $still_pending "Failed reload - There are still unset sizes"
+}
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
new file mode 100755 (executable)
index 0000000..3b1e047
--- /dev/null
@@ -0,0 +1,253 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.2.2/28                          |
+# |                                            |
+# |  + g1a (gre)          + g1b (gre)          |
+# |    loc=192.0.2.65       loc=192.0.2.81     |
+# |    rem=192.0.2.66 --.   rem=192.0.2.82 --. |
+# |    tos=inherit      |   tos=inherit      | |
+# |  .------------------'                    | |
+# |  |                    .------------------' |
+# |  v                    v                    |
+# |  + $ul1.111 (vlan)    + $ul1.222 (vlan)    |
+# |  | 192.0.2.129/28     | 192.0.2.145/28     |
+# |   \                  /                     |
+# |    \________________/                      |
+# |            |                               |
+# |            + $ul1                          |
+# +------------|-------------------------------+
+#              |
+# +------------|-------------------------------+
+# | SW2        + $ul2                          |
+# |     _______|________                       |
+# |    /                \                      |
+# |   /                  \                     |
+# |  + $ul2.111 (vlan)    + $ul2.222 (vlan)    |
+# |  ^ 192.0.2.130/28     ^ 192.0.2.146/28     |
+# |  |                    |                    |
+# |  |                    '------------------. |
+# |  '------------------.                    | |
+# |  + g2a (gre)        | + g2b (gre)        | |
+# |    loc=192.0.2.66   |   loc=192.0.2.82   | |
+# |    rem=192.0.2.65 --'   rem=192.0.2.81 --' |
+# |    tos=inherit          tos=inherit        |
+# |                                            |
+# |              $ol2 +                        |
+# |     192.0.2.17/28 |                        |
+# +-------------------|------------------------+
+#                     |
+# +-------------------|-----+
+# | H2                |     |
+# |               $h2 +     |
+# |     192.0.2.18/28       |
+# +-------------------------+
+
+ALL_TESTS="
+       ping_ipv4
+       multipath_ipv4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+       ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+       ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+       simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+       simple_if_init $ol1 192.0.2.2/28
+       __simple_if_init $ul1 v$ol1
+       vlan_create $ul1 111 v$ol1 192.0.2.129/28
+       vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+       tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+       __simple_if_init g1a v$ol1 192.0.2.65/32
+       ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+       tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+       __simple_if_init g1b v$ol1 192.0.2.81/32
+       ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+       ip route add vrf v$ol1 192.0.2.16/28 \
+          nexthop dev g1a \
+          nexthop dev g1b
+
+       tc qdisc add dev $ul1 clsact
+       tc filter add dev $ul1 egress pref 111 prot 802.1q \
+          flower vlan_id 111 action pass
+       tc filter add dev $ul1 egress pref 222 prot 802.1q \
+          flower vlan_id 222 action pass
+}
+
+sw1_destroy()
+{
+       tc qdisc del dev $ul1 clsact
+
+       ip route del vrf v$ol1 192.0.2.16/28
+
+       ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+       __simple_if_fini g1b 192.0.2.81/32
+       tunnel_destroy g1b
+
+       ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+       __simple_if_fini g1a 192.0.2.65/32
+       tunnel_destroy g1a
+
+       vlan_destroy $ul1 222
+       vlan_destroy $ul1 111
+       __simple_if_fini $ul1
+       simple_if_fini $ol1 192.0.2.2/28
+}
+
+sw2_create()
+{
+       simple_if_init $ol2 192.0.2.17/28
+       __simple_if_init $ul2 v$ol2
+       vlan_create $ul2 111 v$ol2 192.0.2.130/28
+       vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+       tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+       __simple_if_init g2a v$ol2 192.0.2.66/32
+       ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+       tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+       __simple_if_init g2b v$ol2 192.0.2.82/32
+       ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+       ip route add vrf v$ol2 192.0.2.0/28 \
+          nexthop dev g2a \
+          nexthop dev g2b
+}
+
+sw2_destroy()
+{
+       ip route del vrf v$ol2 192.0.2.0/28
+
+       ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+       __simple_if_fini g2b 192.0.2.82/32
+       tunnel_destroy g2b
+
+       ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+       __simple_if_fini g2a 192.0.2.66/32
+       tunnel_destroy g2a
+
+       vlan_destroy $ul2 222
+       vlan_destroy $ul2 111
+       __simple_if_fini $ul2
+       simple_if_fini $ol2 192.0.2.17/28
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.18/28
+       ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+       ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+       simple_if_fini $h2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       ol1=${NETIFS[p2]}
+
+       ul1=${NETIFS[p3]}
+       ul2=${NETIFS[p4]}
+
+       ol2=${NETIFS[p5]}
+       h2=${NETIFS[p6]}
+
+       vrf_prepare
+       h1_create
+       sw1_create
+       sw2_create
+       h2_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       h2_destroy
+       sw2_destroy
+       sw1_destroy
+       h1_destroy
+       vrf_cleanup
+}
+
+multipath4_test()
+{
+       local what=$1; shift
+       local weight1=$1; shift
+       local weight2=$1; shift
+
+       sysctl_set net.ipv4.fib_multipath_hash_policy 1
+       ip route replace vrf v$ol1 192.0.2.16/28 \
+          nexthop dev g1a weight $weight1 \
+          nexthop dev g1b weight $weight2
+
+       local t0_111=$(tc_rule_stats_get $ul1 111 egress)
+       local t0_222=$(tc_rule_stats_get $ul1 222 egress)
+
+       ip vrf exec v$h1 \
+          $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+              -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       local t1_111=$(tc_rule_stats_get $ul1 111 egress)
+       local t1_222=$(tc_rule_stats_get $ul1 222 egress)
+
+       local d111=$((t1_111 - t0_111))
+       local d222=$((t1_222 - t0_222))
+       multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+       ip route replace vrf v$ol1 192.0.2.16/28 \
+          nexthop dev g1a \
+          nexthop dev g1b
+       sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+       ping_test $h1 192.0.2.18
+}
+
+multipath_ipv4()
+{
+       log_info "Running IPv4 multipath tests"
+       multipath4_test "ECMP" 1 1
+       multipath4_test "Weighted MP 2:1" 2 1
+       multipath4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
index 7b18a53aa55665a2a979ee895cc03c968107f5eb..ca53b539aa2d18affad9fd63399837c7f5a7fe14 100644 (file)
@@ -8,14 +8,21 @@
 PING=${PING:=ping}
 PING6=${PING6:=ping6}
 MZ=${MZ:=mausezahn}
+ARPING=${ARPING:=arping}
+TEAMD=${TEAMD:=teamd}
 WAIT_TIME=${WAIT_TIME:=5}
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
 NETIF_TYPE=${NETIF_TYPE:=veth}
 NETIF_CREATE=${NETIF_CREATE:=yes}
 
-if [[ -f forwarding.config ]]; then
-       source forwarding.config
+relative_path="${BASH_SOURCE%/*}"
+if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
+       relative_path="."
+fi
+
+if [[ -f $relative_path/forwarding.config ]]; then
+       source "$relative_path/forwarding.config"
 fi
 
 ##############################################################################
@@ -28,7 +35,10 @@ check_tc_version()
                echo "SKIP: iproute2 too old; tc is missing JSON support"
                exit 1
        fi
+}
 
+check_tc_shblock_support()
+{
        tc filter help 2>&1 | grep block &> /dev/null
        if [[ $? -ne 0 ]]; then
                echo "SKIP: iproute2 too old; tc is missing shared block support"
@@ -36,6 +46,15 @@ check_tc_version()
        fi
 }
 
+check_tc_chain_support()
+{
+       tc help 2>&1|grep chain &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: iproute2 too old; tc is missing chain support"
+               exit 1
+       fi
+}
+
 if [[ "$(id -u)" -ne 0 ]]; then
        echo "SKIP: need root privileges"
        exit 0
@@ -45,15 +64,18 @@ if [[ "$CHECK_TC" = "yes" ]]; then
        check_tc_version
 fi
 
-if [[ ! -x "$(command -v jq)" ]]; then
-       echo "SKIP: jq not installed"
-       exit 1
-fi
+require_command()
+{
+       local cmd=$1; shift
 
-if [[ ! -x "$(command -v $MZ)" ]]; then
-       echo "SKIP: $MZ not installed"
-       exit 1
-fi
+       if [[ ! -x "$(command -v "$cmd")" ]]; then
+               echo "SKIP: $cmd not installed"
+               exit 1
+       fi
+}
+
+require_command jq
+require_command $MZ
 
 if [[ ! -v NUM_NETIFS ]]; then
        echo "SKIP: importer does not define \"NUM_NETIFS\""
@@ -151,6 +173,19 @@ check_fail()
        fi
 }
 
+check_err_fail()
+{
+       local should_fail=$1; shift
+       local err=$1; shift
+       local what=$1; shift
+
+       if ((should_fail)); then
+               check_fail $err "$what succeeded, but should have failed"
+       else
+               check_err $err "$what failed"
+       fi
+}
+
 log_test()
 {
        local test_name=$1
@@ -185,24 +220,54 @@ log_info()
        echo "INFO: $msg"
 }
 
+setup_wait_dev()
+{
+       local dev=$1; shift
+
+       while true; do
+               ip link show dev $dev up \
+                       | grep 'state UP' &> /dev/null
+               if [[ $? -ne 0 ]]; then
+                       sleep 1
+               else
+                       break
+               fi
+       done
+}
+
 setup_wait()
 {
-       for i in $(eval echo {1..$NUM_NETIFS}); do
-               while true; do
-                       ip link show dev ${NETIFS[p$i]} up \
-                               | grep 'state UP' &> /dev/null
-                       if [[ $? -ne 0 ]]; then
-                               sleep 1
-                       else
-                               break
-                       fi
-               done
+       local num_netifs=${1:-$NUM_NETIFS}
+
+       for ((i = 1; i <= num_netifs; ++i)); do
+               setup_wait_dev ${NETIFS[p$i]}
        done
 
        # Make sure links are ready.
        sleep $WAIT_TIME
 }
 
+lldpad_app_wait_set()
+{
+       local dev=$1; shift
+
+       while lldptool -t -i $dev -V APP -c app | grep -q pending; do
+               echo "$dev: waiting for lldpad to push pending APP updates"
+               sleep 5
+       done
+}
+
+lldpad_app_wait_del()
+{
+       # Give lldpad a chance to push down the changes. If the device is downed
+       # too soon, the updates will be left pending. However, they will have
+       # been struck off the lldpad's DB already, so we won't be able to tell
+       # they are pending. Then on next test iteration this would cause
+       # weirdness as newly-added APP rules conflict with the old ones,
+       # sometimes getting stuck in an "unknown" state.
+       sleep 5
+}
+
 pre_cleanup()
 {
        if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
@@ -287,6 +352,29 @@ __addr_add_del()
        done
 }
 
+__simple_if_init()
+{
+       local if_name=$1; shift
+       local vrf_name=$1; shift
+       local addrs=("${@}")
+
+       ip link set dev $if_name master $vrf_name
+       ip link set dev $if_name up
+
+       __addr_add_del $if_name add "${addrs[@]}"
+}
+
+__simple_if_fini()
+{
+       local if_name=$1; shift
+       local addrs=("${@}")
+
+       __addr_add_del $if_name del "${addrs[@]}"
+
+       ip link set dev $if_name down
+       ip link set dev $if_name nomaster
+}
+
 simple_if_init()
 {
        local if_name=$1
@@ -298,11 +386,8 @@ simple_if_init()
        array=("${@}")
 
        vrf_create $vrf_name
-       ip link set dev $if_name master $vrf_name
        ip link set dev $vrf_name up
-       ip link set dev $if_name up
-
-       __addr_add_del $if_name add "${array[@]}"
+       __simple_if_init $if_name $vrf_name "${array[@]}"
 }
 
 simple_if_fini()
@@ -315,9 +400,7 @@ simple_if_fini()
        vrf_name=v$if_name
        array=("${@}")
 
-       __addr_add_del $if_name del "${array[@]}"
-
-       ip link set dev $if_name down
+       __simple_if_fini $if_name "${array[@]}"
        vrf_destroy $vrf_name
 }
 
@@ -365,6 +448,28 @@ vlan_destroy()
        ip link del dev $name
 }
 
+team_create()
+{
+       local if_name=$1; shift
+       local mode=$1; shift
+
+       require_command $TEAMD
+       $TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
+       for slave in "$@"; do
+               ip link set dev $slave down
+               ip link set dev $slave master $if_name
+               ip link set dev $slave up
+       done
+       ip link set dev $if_name up
+}
+
+team_destroy()
+{
+       local if_name=$1; shift
+
+       $TEAMD -t $if_name -k
+}
+
 master_name_get()
 {
        local if_name=$1
@@ -383,9 +488,10 @@ tc_rule_stats_get()
 {
        local dev=$1; shift
        local pref=$1; shift
+       local dir=$1; shift
 
-       tc -j -s filter show dev $dev ingress pref $pref |
-       jq '.[1].options.actions[].stats.packets'
+       tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
+           | jq '.[1].options.actions[].stats.packets'
 }
 
 mac_get()
@@ -437,7 +543,9 @@ forwarding_restore()
 
 tc_offload_check()
 {
-       for i in $(eval echo {1..$NUM_NETIFS}); do
+       local num_netifs=${1:-$NUM_NETIFS}
+
+       for ((i = 1; i <= num_netifs; ++i)); do
                ethtool -k ${NETIFS[p$i]} \
                        | grep "hw-tc-offload: on" &> /dev/null
                if [[ $? -ne 0 ]]; then
@@ -453,9 +561,15 @@ trap_install()
        local dev=$1; shift
        local direction=$1; shift
 
-       # For slow-path testing, we need to install a trap to get to
-       # slow path the packets that would otherwise be switched in HW.
-       tc filter add dev $dev $direction pref 1 flower skip_sw action trap
+       # Some devices may not support or need in-hardware trapping of traffic
+       # (e.g. the veth pairs that this library creates for non-existent
+       # loopbacks). Use continue instead, so that there is a filter in there
+       # (some tests check counters), and so that other filters are still
+       # processed.
+       tc filter add dev $dev $direction pref 1 \
+               flower skip_sw action trap 2>/dev/null \
+           || tc filter add dev $dev $direction pref 1 \
+                      flower action continue
 }
 
 trap_uninstall()
@@ -463,11 +577,13 @@ trap_uninstall()
        local dev=$1; shift
        local direction=$1; shift
 
-       tc filter del dev $dev $direction pref 1 flower skip_sw
+       tc filter del dev $dev $direction pref 1 flower
 }
 
 slow_path_trap_install()
 {
+       # For slow-path testing, we need to install a trap to get to
+       # slow path the packets that would otherwise be switched in HW.
        if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
                trap_install "$@"
        fi
@@ -537,6 +653,48 @@ vlan_capture_uninstall()
        __vlan_capture_add_del del 100 "$@"
 }
 
+__dscp_capture_add_del()
+{
+       local add_del=$1; shift
+       local dev=$1; shift
+       local base=$1; shift
+       local dscp;
+
+       for prio in {0..7}; do
+               dscp=$((base + prio))
+               __icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
+                                      "skip_hw ip_tos $((dscp << 2))"
+       done
+}
+
+dscp_capture_install()
+{
+       local dev=$1; shift
+       local base=$1; shift
+
+       __dscp_capture_add_del add $dev $base
+}
+
+dscp_capture_uninstall()
+{
+       local dev=$1; shift
+       local base=$1; shift
+
+       __dscp_capture_add_del del $dev $base
+}
+
+dscp_fetch_stats()
+{
+       local dev=$1; shift
+       local base=$1; shift
+
+       for prio in {0..7}; do
+               local dscp=$((base + prio))
+               local t=$(tc_rule_stats_get $dev $((dscp + 100)))
+               echo "[$dscp]=$t "
+       done
+}
+
 matchall_sink_create()
 {
        local dev=$1; shift
@@ -557,33 +715,86 @@ tests_run()
        done
 }
 
+multipath_eval()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local packets_rp12=$4
+       local packets_rp13=$5
+       local weights_ratio packets_ratio diff
+
+       RET=0
+
+       if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+               weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+                               | bc -l)
+       else
+               weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
+                               | bc -l)
+       fi
+
+       if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+              check_err 1 "Packet difference is 0"
+              log_test "Multipath"
+              log_info "Expected ratio $weights_ratio"
+              return
+       fi
+
+       if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+               packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+                               | bc -l)
+       else
+               packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
+                               | bc -l)
+       fi
+
+       diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+       diff=${diff#-}
+
+       test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
+       check_err $? "Too large discrepancy between expected and measured ratios"
+       log_test "$desc"
+       log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
 ##############################################################################
 # Tests
 
-ping_test()
+ping_do()
 {
        local if_name=$1
        local dip=$2
        local vrf_name
 
-       RET=0
-
        vrf_name=$(master_name_get $if_name)
        ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+}
+
+ping_test()
+{
+       RET=0
+
+       ping_do $1 $2
        check_err $?
        log_test "ping"
 }
 
-ping6_test()
+ping6_do()
 {
        local if_name=$1
        local dip=$2
        local vrf_name
 
-       RET=0
-
        vrf_name=$(master_name_get $if_name)
        ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+}
+
+ping6_test()
+{
+       RET=0
+
+       ping6_do $1 $2
        check_err $?
        log_test "ping6"
 }
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
new file mode 100755 (executable)
index 0000000..c5095da
--- /dev/null
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d).
+#
+# This test uses standard topology for testing mirror-to-gretap. See
+# mirror_gre_topo_lib.sh for more details. The full topology is as follows:
+#
+#  +---------------------+                             +---------------------+
+#  | H1                  |                             |                  H2 |
+#  |     + $h1           |                             |           $h2 +     |
+#  |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#  +-----|---------------+                             +---------------|-----+
+#        |                                                             |
+#  +-----|-------------------------------------------------------------|-----+
+#  | SW  o---> mirror                                                  |     |
+#  | +---|-------------------------------------------------------------|---+ |
+#  | |   + $swp1            + br1 (802.1q bridge)                $swp2 +   | |
+#  | +---------------------------------------------------------------------+ |
+#  |                                                                         |
+#  | +---------------------------------------------------------------------+ |
+#  | |                      + br2 (802.1d bridge)                          | |
+#  | |                        192.0.2.129/28                               | |
+#  | |   + $swp3              2001:db8:2::1/64                             | |
+#  | +---|-----------------------------------------------------------------+ |
+#  |     |                                          ^                    ^   |
+#  |     |                     + gt6 (ip6gretap)    | + gt4 (gretap)     |   |
+#  |     |                     : loc=2001:db8:2::1  | : loc=192.0.2.129  |   |
+#  |     |                     : rem=2001:db8:2::2 -+ : rem=192.0.2.130 -+   |
+#  |     |                     : ttl=100              : ttl=100              |
+#  |     |                     : tos=inherit          : tos=inherit          |
+#  +-----|---------------------:----------------------:----------------------+
+#        |                     :                      :
+#  +-----|---------------------:----------------------:----------------------+
+#  | H3  + $h3                 + h3-gt6(ip6gretap)    + h3-gt4 (gretap)      |
+#  |       192.0.2.130/28        loc=2001:db8:2::2      loc=192.0.2.130      |
+#  |       2001:db8:2::2/64      rem=2001:db8:2::1      rem=192.0.2.129      |
+#  |                             ttl=100                ttl=100              |
+#  |                             tos=inherit            tos=inherit          |
+#  +-------------------------------------------------------------------------+
+
+ALL_TESTS="
+       test_gretap
+       test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       vrf_prepare
+       mirror_gre_topo_create
+
+       ip link add name br2 type bridge vlan_filtering 0
+       ip link set dev br2 up
+
+       ip link set dev $swp3 master br2
+       ip route add 192.0.2.130/32 dev br2
+       ip -6 route add 2001:db8:2::2/128 dev br2
+
+       ip address add dev br2 192.0.2.129/28
+       ip address add dev br2 2001:db8:2::1/64
+
+       ip address add dev $h3 192.0.2.130/28
+       ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       ip address del dev $h3 2001:db8:2::2/64
+       ip address del dev $h3 192.0.2.130/28
+       ip link del dev br2
+
+       mirror_gre_topo_destroy
+       vrf_cleanup
+}
+
+test_gretap()
+{
+       full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+       full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+       full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+       full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_all()
+{
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       tests_run
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+       echo "WARN: Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       test_all
+fi
+
+exit $EXIT_STATUS
index 3bb4c2ba7b14281340b4560ce21be3a9bd79bd48..197e769c2ed16a65826c3bd67fb560725a28697b 100755 (executable)
@@ -74,12 +74,14 @@ test_vlan_match()
 
 test_gretap()
 {
-       test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
+       test_vlan_match gt4 'skip_hw vlan_id 555 vlan_ethtype ip' \
+                       "mirror to gretap"
 }
 
 test_ip6gretap()
 {
-       test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
+       test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+                       "mirror to ip6gretap"
 }
 
 test_gretap_stp()
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
new file mode 100755 (executable)
index 0000000..a3402cd
--- /dev/null
@@ -0,0 +1,126 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device with vlan filtering (802.1q).
+#
+# This test uses standard topology for testing mirror-to-gretap. See
+# mirror_gre_topo_lib.sh for more details. The full topology is as follows:
+#
+#  +---------------------+                               +---------------------+
+#  | H1                  |                               |                  H2 |
+#  |     + $h1           |                               |           $h2 +     |
+#  |     | 192.0.2.1/28  |                               |  192.0.2.2/28 |     |
+#  +-----|---------------+                               +---------------|-----+
+#        |                                                               |
+#  +-----|---------------------------------------------------------------|-----+
+#  | SW  o---> mirror                                                    |     |
+#  | +---|---------------------------------------------------------------|---+ |
+#  | |   + $swp1                  + br1 (802.1q bridge)            $swp2 +   | |
+#  | |                              192.0.2.129/28                           | |
+#  | |   + $swp3                    2001:db8:2::1/64                         | |
+#  | |   | vid555                   vid555[pvid,untagged]                    | |
+#  | +---|-------------------------------------------------------------------+ |
+#  |     |                                          ^                      ^   |
+#  |     |                     + gt6 (ip6gretap)    |   + gt4 (gretap)     |   |
+#  |     |                     : loc=2001:db8:2::1  |   : loc=192.0.2.129  |   |
+#  |     |                     : rem=2001:db8:2::2 -+   : rem=192.0.2.130 -+   |
+#  |     |                     : ttl=100                : ttl=100              |
+#  |     |                     : tos=inherit            : tos=inherit          |
+#  +-----|---------------------:------------------------:----------------------+
+#        |                     :                        :
+#  +-----|---------------------:------------------------:----------------------+
+#  | H3  + $h3                 + h3-gt6(ip6gretap)      + h3-gt4 (gretap)      |
+#  |     |                       loc=2001:db8:2::2        loc=192.0.2.130      |
+#  |     + $h3.555               rem=2001:db8:2::1        rem=192.0.2.129      |
+#  |       192.0.2.130/28        ttl=100                  ttl=100              |
+#  |       2001:db8:2::2/64      tos=inherit              tos=inherit          |
+#  +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+       test_gretap
+       test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       swp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       vrf_prepare
+       mirror_gre_topo_create
+
+       ip link set dev $swp3 master br1
+       bridge vlan add dev br1 vid 555 pvid untagged self
+       ip address add dev br1 192.0.2.129/28
+       ip address add dev br1 2001:db8:2::1/64
+
+       ip -4 route add 192.0.2.130/32 dev br1
+       ip -6 route add 2001:db8:2::2/128 dev br1
+
+       vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+       bridge vlan add dev $swp3 vid 555
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       ip link set dev $swp3 nomaster
+       vlan_destroy $h3 555
+
+       mirror_gre_topo_destroy
+       vrf_cleanup
+}
+
+test_gretap()
+{
+       full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+       full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+       full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+       full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+tests()
+{
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       tests_run
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+tests
+
+if ! tc_offload_check; then
+       echo "WARN: Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       tests
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
new file mode 100755 (executable)
index 0000000..61844ca
--- /dev/null
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device with vlan filtering (802.1q), and the egress device is a team
+# device.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |     + $h1.333        |                             |        $h1.555 +     |
+# |     | 192.0.2.1/28   |                             |  192.0.2.18/28 |     |
+# +-----|----------------+                             +----------------|-----+
+#       |                               $h1                             |
+#       +--------------------------------+------------------------------+
+#                                        |
+# +--------------------------------------|------------------------------------+
+# | SW                                   o---> mirror                         |
+# |                                      |                                    |
+# |     +--------------------------------+------------------------------+     |
+# |     |                              $swp1                            |     |
+# |     + $swp1.333                                           $swp1.555 +     |
+# |       192.0.2.2/28                                    192.0.2.17/28       |
+# |                                                                           |
+# | +-----------------------------------------------------------------------+ |
+# | |                        BR1 (802.1q)                                   | |
+# | |     + lag (team)       192.0.2.129/28                                 | |
+# | |    / \                 2001:db8:2::1/64                               | |
+# | +---/---\---------------------------------------------------------------+ |
+# |    /     \                                                            ^   |
+# |   |       \                                        + gt4 (gretap)     |   |
+# |   |        \                                         loc=192.0.2.129  |   |
+# |   |         \                                        rem=192.0.2.130 -+   |
+# |   |          \                                       ttl=100              |
+# |   |           \                                      tos=inherit          |
+# |   |            \                                                          |
+# |   |             \_________________________________                        |
+# |   |                                               \                       |
+# |   + $swp3                                          + $swp4                |
+# +---|------------------------------------------------|----------------------+
+#     |                                                |
+# +---|----------------------+                     +---|----------------------+
+# |   + $h3               H3 |                     |   + $h4               H4 |
+# |     192.0.2.130/28       |                     |     192.0.2.130/28       |
+# |     2001:db8:2::2/64     |                     |     2001:db8:2::2/64     |
+# +--------------------------+                     +--------------------------+
+
+ALL_TESTS="
+       test_mirror_gretap_first
+       test_mirror_gretap_second
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+require_command $ARPING
+
+vlan_host_create()
+{
+       local if_name=$1; shift
+       local vid=$1; shift
+       local vrf_name=$1; shift
+       local ips=("${@}")
+
+       vrf_create $vrf_name
+       ip link set dev $vrf_name up
+       vlan_create $if_name $vid $vrf_name "${ips[@]}"
+}
+
+vlan_host_destroy()
+{
+       local if_name=$1; shift
+       local vid=$1; shift
+       local vrf_name=$1; shift
+
+       vlan_destroy $if_name $vid
+       ip link set dev $vrf_name down
+       vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+       vlan_host_create $h1 333 vrf-h1 192.0.2.1/28
+       ip -4 route add 192.0.2.16/28 vrf vrf-h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+       ip -4 route del 192.0.2.16/28 vrf vrf-h1
+       vlan_host_destroy $h1 333 vrf-h1
+}
+
+h2_create()
+{
+       vlan_host_create $h1 555 vrf-h2 192.0.2.18/28
+       ip -4 route add 192.0.2.0/28 vrf vrf-h2 nexthop via 192.0.2.17
+}
+
+h2_destroy()
+{
+       ip -4 route del 192.0.2.0/28 vrf vrf-h2
+       vlan_host_destroy $h1 555 vrf-h2
+}
+
+h3_create()
+{
+       simple_if_init $h3 192.0.2.130/28
+       tc qdisc add dev $h3 clsact
+}
+
+h3_destroy()
+{
+       tc qdisc del dev $h3 clsact
+       simple_if_fini $h3 192.0.2.130/28
+}
+
+h4_create()
+{
+       simple_if_init $h4 192.0.2.130/28
+       tc qdisc add dev $h4 clsact
+}
+
+h4_destroy()
+{
+       tc qdisc del dev $h4 clsact
+       simple_if_fini $h4 192.0.2.130/28
+}
+
+switch_create()
+{
+       ip link set dev $swp1 up
+       tc qdisc add dev $swp1 clsact
+       vlan_create $swp1 333 "" 192.0.2.2/28
+       vlan_create $swp1 555 "" 192.0.2.17/28
+
+       tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+                     ttl 100 tos inherit
+
+       ip link set dev $swp3 up
+       ip link set dev $swp4 up
+
+       ip link add name br1 type bridge vlan_filtering 1
+       ip link set dev br1 up
+       __addr_add_del br1 add 192.0.2.129/32
+       ip -4 route add 192.0.2.130/32 dev br1
+
+       team_create lag loadbalance $swp3 $swp4
+       ip link set dev lag master br1
+}
+
+switch_destroy()
+{
+       ip link set dev lag nomaster
+       team_destroy lag
+
+       ip -4 route del 192.0.2.130/32 dev br1
+       __addr_add_del br1 del 192.0.2.129/32
+       ip link set dev br1 down
+       ip link del dev br1
+
+       ip link set dev $swp4 down
+       ip link set dev $swp3 down
+
+       tunnel_destroy gt4
+
+       vlan_destroy $swp1 555
+       vlan_destroy $swp1 333
+       tc qdisc del dev $swp1 clsact
+       ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp3=${NETIFS[p3]}
+       h3=${NETIFS[p4]}
+
+       swp4=${NETIFS[p5]}
+       h4=${NETIFS[p6]}
+
+       vrf_prepare
+
+       ip link set dev $h1 up
+       h1_create
+       h2_create
+       h3_create
+       h4_create
+       switch_create
+
+       trap_install $h3 ingress
+       trap_install $h4 ingress
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       trap_uninstall $h4 ingress
+       trap_uninstall $h3 ingress
+
+       switch_destroy
+       h4_destroy
+       h3_destroy
+       h2_destroy
+       h1_destroy
+       ip link set dev $h1 down
+
+       vrf_cleanup
+}
+
+test_lag_slave()
+{
+       local host_dev=$1; shift
+       local up_dev=$1; shift
+       local down_dev=$1; shift
+       local what=$1; shift
+
+       RET=0
+
+       mirror_install $swp1 ingress gt4 \
+                      "proto 802.1q flower vlan_id 333 $tcflags"
+
+       # Test connectivity through $up_dev when $down_dev is set down.
+       ip link set dev $down_dev down
+       setup_wait_dev $up_dev
+       setup_wait_dev $host_dev
+       $ARPING -I br1 192.0.2.130 -qfc 1
+       sleep 2
+       mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10
+
+       # Test lack of connectivity when both slaves are down.
+       ip link set dev $up_dev down
+       sleep 2
+       mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
+       mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+
+       ip link set dev $up_dev up
+       ip link set dev $down_dev up
+       mirror_uninstall $swp1 ingress
+
+       log_test "$what ($tcflags)"
+}
+
+test_mirror_gretap_first()
+{
+       test_lag_slave $h3 $swp3 $swp4 "mirror to gretap: LAG first slave"
+}
+
+test_mirror_gretap_second()
+{
+       test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave"
+}
+
+test_all()
+{
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       tests_run
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+       echo "WARN: Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       test_all
+fi
+
+exit $EXIT_STATUS
index aa29d46186a837d53fa8fb76fa4b0161563417e0..135902aa8b11498bfbf99428a77084b4ee3de7fd 100755 (executable)
@@ -122,15 +122,8 @@ test_span_gre_egress_up()
        # After setting the device up, wait for neighbor to get resolved so that
        # we can expect mirroring to work.
        ip link set dev $swp3 up
-       while true; do
-               ip neigh sh dev $swp3 $remote_ip nud reachable |
-                   grep -q ^
-               if [[ $? -ne 0 ]]; then
-                       sleep 1
-               else
-                       break
-               fi
-       done
+       setup_wait_dev $swp3
+       ping -c 1 -I $swp3 $remote_ip &>/dev/null
 
        quick_test_span_gre_dir $tundev ingress
        mirror_uninstall $swp1 ingress
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
new file mode 100755 (executable)
index 0000000..9edf4cb
--- /dev/null
@@ -0,0 +1,285 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# team device.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1.333         |                             |        $h1.555 +     |
+# |    | 192.0.2.1/28    |                             |  192.0.2.18/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                $h1                             |
+#      +---------------------------------+------------------------------+
+#                                        |
+# +--------------------------------------|------------------------------------+
+# | SW                                   o---> mirror                         |
+# |                                      |                                    |
+# |   +----------------------------------+------------------------------+     |
+# |   |                                $swp1                            |     |
+# |   + $swp1.333                                             $swp1.555 +     |
+# |     192.0.2.2/28                                      192.0.2.17/28       |
+# |                                                                           |
+# |                                                                           |
+# |   + gt4 (gretap)      ,-> + lag1 (team)                                   |
+# |     loc=192.0.2.129   |   | 192.0.2.129/28                                |
+# |     rem=192.0.2.130 --'   |                                               |
+# |     ttl=100               |                                               |
+# |     tos=inherit           |                                               |
+# |      _____________________|______________________                         |
+# |     /                                            \                        |
+# |    /                                              \                       |
+# |   + $swp3                                          + $swp4                |
+# +---|------------------------------------------------|----------------------+
+#     |                                                |
+# +---|------------------------------------------------|----------------------+
+# |   + $h3                                            + $h4               H3 |
+# |    \                                              /                       |
+# |     \____________________________________________/                        |
+# |                           |                                               |
+# |                           + lag2 (team)                                   |
+# |                             192.0.2.130/28                                |
+# |                                                                           |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+       test_mirror_gretap_first
+       test_mirror_gretap_second
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+require_command $ARPING
+
+vlan_host_create()
+{
+       local if_name=$1; shift
+       local vid=$1; shift
+       local vrf_name=$1; shift
+       local ips=("${@}")
+
+       vrf_create $vrf_name
+       ip link set dev $vrf_name up
+       vlan_create $if_name $vid $vrf_name "${ips[@]}"
+}
+
+vlan_host_destroy()
+{
+       local if_name=$1; shift
+       local vid=$1; shift
+       local vrf_name=$1; shift
+
+       vlan_destroy $if_name $vid
+       ip link set dev $vrf_name down
+       vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+       vlan_host_create $h1 333 vrf-h1 192.0.2.1/28
+       ip -4 route add 192.0.2.16/28 vrf vrf-h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+       ip -4 route del 192.0.2.16/28 vrf vrf-h1
+       vlan_host_destroy $h1 333 vrf-h1
+}
+
+h2_create()
+{
+       vlan_host_create $h1 555 vrf-h2 192.0.2.18/28
+       ip -4 route add 192.0.2.0/28 vrf vrf-h2 nexthop via 192.0.2.17
+}
+
+h2_destroy()
+{
+       ip -4 route del 192.0.2.0/28 vrf vrf-h2
+       vlan_host_destroy $h1 555 vrf-h2
+}
+
+h3_create_team()
+{
+       team_create lag2 lacp $h3 $h4
+       __simple_if_init lag2 vrf-h3 192.0.2.130/32
+       ip -4 route add vrf vrf-h3 192.0.2.129/32 dev lag2
+}
+
+h3_destroy_team()
+{
+       ip -4 route del vrf vrf-h3 192.0.2.129/32 dev lag2
+       __simple_if_fini lag2 192.0.2.130/32
+       team_destroy lag2
+
+       ip link set dev $h3 down
+       ip link set dev $h4 down
+}
+
+h3_create()
+{
+       vrf_create vrf-h3
+       ip link set dev vrf-h3 up
+       tc qdisc add dev $h3 clsact
+       tc qdisc add dev $h4 clsact
+       h3_create_team
+}
+
+h3_destroy()
+{
+       h3_destroy_team
+       tc qdisc del dev $h4 clsact
+       tc qdisc del dev $h3 clsact
+       ip link set dev vrf-h3 down
+       vrf_destroy vrf-h3
+}
+
+switch_create()
+{
+       ip link set dev $swp1 up
+       tc qdisc add dev $swp1 clsact
+       vlan_create $swp1 333 "" 192.0.2.2/28
+       vlan_create $swp1 555 "" 192.0.2.17/28
+
+       tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+                     ttl 100 tos inherit
+
+       ip link set dev $swp3 up
+       ip link set dev $swp4 up
+       team_create lag1 lacp $swp3 $swp4
+       __addr_add_del lag1 add 192.0.2.129/32
+       ip -4 route add 192.0.2.130/32 dev lag1
+}
+
+switch_destroy()
+{
+       ip -4 route del 192.0.2.130/32 dev lag1
+       __addr_add_del lag1 del 192.0.2.129/32
+       team_destroy lag1
+
+       ip link set dev $swp4 down
+       ip link set dev $swp3 down
+
+       tunnel_destroy gt4
+
+       vlan_destroy $swp1 555
+       vlan_destroy $swp1 333
+       tc qdisc del dev $swp1 clsact
+       ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp3=${NETIFS[p3]}
+       h3=${NETIFS[p4]}
+
+       swp4=${NETIFS[p5]}
+       h4=${NETIFS[p6]}
+
+       vrf_prepare
+
+       ip link set dev $h1 up
+       h1_create
+       h2_create
+       h3_create
+       switch_create
+
+       trap_install $h3 ingress
+       trap_install $h4 ingress
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       trap_uninstall $h4 ingress
+       trap_uninstall $h3 ingress
+
+       switch_destroy
+       h3_destroy
+       h2_destroy
+       h1_destroy
+       ip link set dev $h1 down
+
+       vrf_cleanup
+}
+
+test_lag_slave()
+{
+       local up_dev=$1; shift
+       local down_dev=$1; shift
+       local what=$1; shift
+
+       RET=0
+
+       mirror_install $swp1 ingress gt4 \
+                      "proto 802.1q flower vlan_id 333 $tcflags"
+
+       # Move $down_dev away from the team. That will prompt change in
+       # txability of the connected device, without changing its upness. The
+       # driver should notice the txability change and move the traffic to the
+       # other slave.
+       ip link set dev $down_dev nomaster
+       sleep 2
+       mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10
+
+       # Test lack of connectivity when neither slave is txable.
+       ip link set dev $up_dev nomaster
+       sleep 2
+       mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
+       mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+       mirror_uninstall $swp1 ingress
+
+       # Recreate H3's team device, because mlxsw, which this test is
+       # predominantly mean to test, requires a bottom-up construction and
+       # doesn't allow enslavement to a device that already has an upper.
+       h3_destroy_team
+       h3_create_team
+       # Wait for ${h,swp}{3,4}.
+       setup_wait
+
+       log_test "$what ($tcflags)"
+}
+
+test_mirror_gretap_first()
+{
+       test_lag_slave $h3 $h4 "mirror to gretap: LAG first slave"
+}
+
+test_mirror_gretap_second()
+{
+       test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave"
+}
+
+test_all()
+{
+       slow_path_trap_install $swp1 ingress
+       slow_path_trap_install $swp1 egress
+
+       tests_run
+
+       slow_path_trap_uninstall $swp1 egress
+       slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+       echo "WARN: Could not test offloaded functionality"
+else
+       tcflags="skip_sw"
+       test_all
+fi
+
+exit $EXIT_STATUS
index 619b469365be171c7a0ba39fbc84693214eb9e65..fac486178ef727aa387450042050f02c65c0ff32 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-source mirror_lib.sh
+source "$relative_path/mirror_lib.sh"
 
 quick_test_span_gre_dir_ips()
 {
@@ -62,7 +62,7 @@ full_test_span_gre_dir_vlan_ips()
                          "$backward_type" "$ip1" "$ip2"
 
        tc filter add dev $h3 ingress pref 77 prot 802.1q \
-               flower $vlan_match ip_proto 0x2f \
+               flower $vlan_match \
                action pass
        mirror_test v$h1 $ip1 $ip2 $h3 77 10
        tc filter del dev $h3 ingress pref 77
index 8fa681eb90e72dee85fa5fc9c44f7b9d0f45a622..6f9ef1820e9370c58ce096d7bd2e2e766f8d0df0 100755 (executable)
@@ -35,6 +35,8 @@ setup_prepare()
        vrf_prepare
        mirror_gre_topo_create
 
+       sysctl_set net.ipv4.conf.v$h3.rp_filter 0
+
        ip address add dev $swp3 192.0.2.161/28
        ip address add dev $h3 192.0.2.162/28
        ip address add dev gt4 192.0.2.129/32
@@ -61,6 +63,8 @@ cleanup()
        ip address del dev $h3 192.0.2.162/28
        ip address del dev $swp3 192.0.2.161/28
 
+       sysctl_restore net.ipv4.conf.v$h3.rp_filter 0
+
        mirror_gre_topo_destroy
        vrf_cleanup
 
index 25341956470840c9f36dd424164ffd10132235a0..39c03e2867f479560a25a622f1bdaa2f3fbbd289 100644 (file)
@@ -33,7 +33,7 @@
 #   |                                                                         |
 #   +-------------------------------------------------------------------------+
 
-source mirror_topo_lib.sh
+source "$relative_path/mirror_topo_lib.sh"
 
 mirror_gre_topo_h3_create()
 {
index 5dbc7a08f4bd5489f46e28522f4de40713c6ace7..204b25f1393494251ad731fd1a8cb9baf98e4002 100755 (executable)
@@ -28,6 +28,8 @@ source mirror_lib.sh
 source mirror_gre_lib.sh
 source mirror_gre_topo_lib.sh
 
+require_command $ARPING
+
 setup_prepare()
 {
        h1=${NETIFS[p1]}
@@ -39,6 +41,12 @@ setup_prepare()
        swp3=${NETIFS[p5]}
        h3=${NETIFS[p6]}
 
+       # gt4's remote address is at $h3.555, not $h3. Thus the packets arriving
+       # directly to $h3 for test_gretap_untagged_egress() are rejected by
+       # rp_filter and the test spuriously fails.
+       sysctl_set net.ipv4.conf.all.rp_filter 0
+       sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
        vrf_prepare
        mirror_gre_topo_create
 
@@ -65,6 +73,9 @@ cleanup()
 
        mirror_gre_topo_destroy
        vrf_cleanup
+
+       sysctl_restore net.ipv4.conf.$h3.rp_filter
+       sysctl_restore net.ipv4.conf.all.rp_filter
 }
 
 test_vlan_match()
@@ -79,12 +90,14 @@ test_vlan_match()
 
 test_gretap()
 {
-       test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
+       test_vlan_match gt4 'skip_hw vlan_id 555 vlan_ethtype ip' \
+                       "mirror to gretap"
 }
 
 test_ip6gretap()
 {
-       test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
+       test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+                       "mirror to ip6gretap"
 }
 
 test_span_gre_forbidden_cpu()
@@ -138,7 +151,7 @@ test_span_gre_forbidden_egress()
 
        bridge vlan add dev $swp3 vid 555
        # Re-prime FDB
-       arping -I br1.555 192.0.2.130 -fqc 1
+       $ARPING -I br1.555 192.0.2.130 -fqc 1
        sleep 1
        quick_test_span_gre_dir $tundev ingress
 
@@ -212,7 +225,7 @@ test_span_gre_fdb_roaming()
 
        bridge fdb del dev $swp2 $h3mac vlan 555 master
        # Re-prime FDB
-       arping -I br1.555 192.0.2.130 -fqc 1
+       $ARPING -I br1.555 192.0.2.130 -fqc 1
        sleep 1
        quick_test_span_gre_dir $tundev ingress
 
index d36dc26c6c516364779e12db4a6532e54cf3f433..07991e1025c70e0e1f5e084fbcef94b7bbaed173 100644 (file)
@@ -105,7 +105,7 @@ do_test_span_vlan_dir_ips()
        # Install the capture as skip_hw to avoid double-counting of packets.
        # The traffic is meant for local box anyway, so will be trapped to
        # kernel.
-       vlan_capture_install $dev "skip_hw vlan_id $vid"
+       vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip"
        mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
        mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
        vlan_capture_uninstall $dev
diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh
new file mode 100755 (executable)
index 0000000..ebc596a
--- /dev/null
@@ -0,0 +1,113 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+       ping_ipv4
+       ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+       simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+       ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+       ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+       ip -6 route del 2001:db8:2::/64 vrf v$h1
+       ip -4 route del 192.0.2.128/28 vrf v$h1
+       simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+       ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+       ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+       ip -6 route del 2001:db8:1::/64 vrf v$h2
+       ip -4 route del 192.0.2.0/28 vrf v$h2
+       simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+       ip link add name br1 type bridge vlan_filtering 1
+       ip link set dev br1 up
+
+       ip link set dev $swp1 master br1
+       ip link set dev $swp1 up
+       __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+       ip link set dev $swp2 up
+       __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+       __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+       ip link set dev $swp2 down
+
+       __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+       ip link set dev $swp1 down
+       ip link set dev $swp1 nomaster
+
+       ip link del dev br1
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       router_create
+
+       forwarding_enable
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       forwarding_restore
+
+       router_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+ping_ipv4()
+{
+       ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+       ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
new file mode 100755 (executable)
index 0000000..fef88eb
--- /dev/null
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+       ping_ipv4
+       ping_ipv6
+       vlan
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+       simple_if_init $h1
+       vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64
+       ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+       ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+       ip -6 route del 2001:db8:2::/64 vrf v$h1
+       ip -4 route del 192.0.2.128/28 vrf v$h1
+       vlan_destroy $h1 555
+       simple_if_fini $h1
+}
+
+h2_create()
+{
+       simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+       ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+       ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+       ip -6 route del 2001:db8:1::/64 vrf v$h2
+       ip -4 route del 192.0.2.0/28 vrf v$h2
+       simple_if_fini $h2 192.0.2.130/28
+}
+
+router_create()
+{
+       ip link add name br1 type bridge vlan_filtering 1
+       ip link set dev br1 up
+
+       ip link set dev $swp1 master br1
+       ip link set dev $swp1 up
+
+       bridge vlan add dev br1 vid 555 self pvid untagged
+       bridge vlan add dev $swp1 vid 555
+
+       __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+       ip link set dev $swp2 up
+       __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+       __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+       ip link set dev $swp2 down
+
+       __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+       ip link set dev $swp1 down
+       ip link set dev $swp1 nomaster
+
+       ip link del dev br1
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       swp1=${NETIFS[p2]}
+
+       swp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+
+       router_create
+
+       forwarding_enable
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       forwarding_restore
+
+       router_destroy
+
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+vlan()
+{
+       RET=0
+
+       bridge vlan add dev br1 vid 333 self
+       check_err $? "Can't add a non-PVID VLAN"
+       bridge vlan del dev br1 vid 333 self
+       check_err $? "Can't remove a non-PVID VLAN"
+
+       log_test "vlan"
+}
+
+ping_ipv4()
+{
+       ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+       ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_broadcast.sh b/tools/testing/selftests/net/forwarding/router_broadcast.sh
new file mode 100755 (executable)
index 0000000..7bd2ebb
--- /dev/null
@@ -0,0 +1,233 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4"
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+       vrf_create "vrf-h1"
+       ip link set dev $h1 master vrf-h1
+
+       ip link set dev vrf-h1 up
+       ip link set dev $h1 up
+
+       ip address add 192.0.2.2/24 dev $h1
+
+       ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+       ip route add 198.51.200.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+}
+
+h1_destroy()
+{
+       ip route del 198.51.200.0/24 vrf vrf-h1
+       ip route del 198.51.100.0/24 vrf vrf-h1
+
+       ip address del 192.0.2.2/24 dev $h1
+
+       ip link set dev $h1 down
+       vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+       vrf_create "vrf-h2"
+       ip link set dev $h2 master vrf-h2
+
+       ip link set dev vrf-h2 up
+       ip link set dev $h2 up
+
+       ip address add 198.51.100.2/24 dev $h2
+
+       ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+       ip route add 198.51.200.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+}
+
+h2_destroy()
+{
+       ip route del 198.51.200.0/24 vrf vrf-h2
+       ip route del 192.0.2.0/24 vrf vrf-h2
+
+       ip address del 198.51.100.2/24 dev $h2
+
+       ip link set dev $h2 down
+       vrf_destroy "vrf-h2"
+}
+
+h3_create()
+{
+       vrf_create "vrf-h3"
+       ip link set dev $h3 master vrf-h3
+
+       ip link set dev vrf-h3 up
+       ip link set dev $h3 up
+
+       ip address add 198.51.200.2/24 dev $h3
+
+       ip route add 192.0.2.0/24 vrf vrf-h3 nexthop via 198.51.200.1
+       ip route add 198.51.100.0/24 vrf vrf-h3 nexthop via 198.51.200.1
+}
+
+h3_destroy()
+{
+       ip route del 198.51.100.0/24 vrf vrf-h3
+       ip route del 192.0.2.0/24 vrf vrf-h3
+
+       ip address del 198.51.200.2/24 dev $h3
+
+       ip link set dev $h3 down
+       vrf_destroy "vrf-h3"
+}
+
+router_create()
+{
+       ip link set dev $rp1 up
+       ip link set dev $rp2 up
+       ip link set dev $rp3 up
+
+       ip address add 192.0.2.1/24 dev $rp1
+
+       ip address add 198.51.100.1/24 dev $rp2
+       ip address add 198.51.200.1/24 dev $rp3
+}
+
+router_destroy()
+{
+       ip address del 198.51.200.1/24 dev $rp3
+       ip address del 198.51.100.1/24 dev $rp2
+
+       ip address del 192.0.2.1/24 dev $rp1
+
+       ip link set dev $rp3 down
+       ip link set dev $rp2 down
+       ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+       h1=${NETIFS[p1]}
+       rp1=${NETIFS[p2]}
+
+       rp2=${NETIFS[p3]}
+       h2=${NETIFS[p4]}
+
+       rp3=${NETIFS[p5]}
+       h3=${NETIFS[p6]}
+
+       vrf_prepare
+
+       h1_create
+       h2_create
+       h3_create
+
+       router_create
+
+       forwarding_enable
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       forwarding_restore
+
+       router_destroy
+
+       h3_destroy
+       h2_destroy
+       h1_destroy
+
+       vrf_cleanup
+}
+
+bc_forwarding_disable()
+{
+       sysctl_set net.ipv4.conf.all.bc_forwarding 0
+       sysctl_set net.ipv4.conf.$rp1.bc_forwarding 0
+}
+
+bc_forwarding_enable()
+{
+       sysctl_set net.ipv4.conf.all.bc_forwarding 1
+       sysctl_set net.ipv4.conf.$rp1.bc_forwarding 1
+}
+
+bc_forwarding_restore()
+{
+       sysctl_restore net.ipv4.conf.$rp1.bc_forwarding
+       sysctl_restore net.ipv4.conf.all.bc_forwarding
+}
+
+ping_test_from()
+{
+       local oif=$1
+       local dip=$2
+       local from=$3
+       local fail=${4:-0}
+
+       RET=0
+
+       log_info "ping $dip, expected reply from $from"
+       ip vrf exec $(master_name_get $oif) \
+       $PING -I $oif $dip -c 10 -i 0.1 -w 2 -b 2>&1 | grep $from &> /dev/null
+       check_err_fail $fail $?
+}
+
+ping_ipv4()
+{
+       sysctl_set net.ipv4.icmp_echo_ignore_broadcasts 0
+
+       bc_forwarding_disable
+       log_info "bc_forwarding disabled on r1 =>"
+       ping_test_from $h1 198.51.100.255 192.0.2.1
+       log_test "h1 -> net2: reply from r1 (not forwarding)"
+       ping_test_from $h1 198.51.200.255 192.0.2.1
+       log_test "h1 -> net3: reply from r1 (not forwarding)"
+       ping_test_from $h1 192.0.2.255 192.0.2.1
+       log_test "h1 -> net1: reply from r1 (not dropping)"
+       ping_test_from $h1 255.255.255.255 192.0.2.1
+       log_test "h1 -> 255.255.255.255: reply from r1 (not forwarding)"
+
+       ping_test_from $h2 192.0.2.255 198.51.100.1
+       log_test "h2 -> net1: reply from r1 (not forwarding)"
+       ping_test_from $h2 198.51.200.255 198.51.100.1
+       log_test "h2 -> net3: reply from r1 (not forwarding)"
+       ping_test_from $h2 198.51.100.255 198.51.100.1
+       log_test "h2 -> net2: reply from r1 (not dropping)"
+       ping_test_from $h2 255.255.255.255 198.51.100.1
+       log_test "h2 -> 255.255.255.255: reply from r1 (not forwarding)"
+       bc_forwarding_restore
+
+       bc_forwarding_enable
+       log_info "bc_forwarding enabled on r1 =>"
+       ping_test_from $h1 198.51.100.255 198.51.100.2
+       log_test "h1 -> net2: reply from h2 (forwarding)"
+       ping_test_from $h1 198.51.200.255 198.51.200.2
+       log_test "h1 -> net3: reply from h3 (forwarding)"
+       ping_test_from $h1 192.0.2.255 192.0.2.1 1
+       log_test "h1 -> net1: no reply (dropping)"
+       ping_test_from $h1 255.255.255.255 192.0.2.1
+       log_test "h1 -> 255.255.255.255: reply from r1 (not forwarding)"
+
+       ping_test_from $h2 192.0.2.255 192.0.2.2
+       log_test "h2 -> net1: reply from h1 (forwarding)"
+       ping_test_from $h2 198.51.200.255 198.51.200.2
+       log_test "h2 -> net3: reply from h3 (forwarding)"
+       ping_test_from $h2 198.51.100.255 198.51.100.1 1
+       log_test "h2 -> net2: no reply (dropping)"
+       ping_test_from $h2 255.255.255.255 198.51.100.1
+       log_test "h2 -> 255.255.255.255: reply from r1 (not forwarding)"
+       bc_forwarding_restore
+
+       sysctl_restore net.ipv4.icmp_echo_ignore_broadcasts
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
index 8b6d0fb6d604d4b632edb8150c62f447ba176aec..79a2099279621a7810b2718f76f2964012423fa0 100755 (executable)
@@ -159,45 +159,6 @@ router2_destroy()
        vrf_destroy "vrf-r2"
 }
 
-multipath_eval()
-{
-       local desc="$1"
-       local weight_rp12=$2
-       local weight_rp13=$3
-       local packets_rp12=$4
-       local packets_rp13=$5
-       local weights_ratio packets_ratio diff
-
-       RET=0
-
-       if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
-              check_err 1 "Packet difference is 0"
-              log_test "Multipath"
-              log_info "Expected ratio $weights_ratio"
-              return
-       fi
-
-       if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
-               weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
-                      | bc -l)
-               packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
-                      | bc -l)
-       else
-               weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
-                      bc -l)
-               packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
-                      bc -l)
-       fi
-
-       diff=$(echo $weights_ratio - $packets_ratio | bc -l)
-       diff=${diff#-}
-
-       test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
-       check_err $? "Too large discrepancy between expected and measured ratios"
-       log_test "$desc"
-       log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
-}
-
 multipath4_test()
 {
        local desc="$1"
index d2c783e94df3d6a5f160215fc11777b6b22b283e..2934fb5ed2a2c776c6c9eedeb47eb6d3266344c1 100755 (executable)
@@ -1,7 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="unreachable_chain_test gact_goto_chain_test"
+ALL_TESTS="unreachable_chain_test gact_goto_chain_test create_destroy_chain \
+          template_filter_fits"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -80,6 +81,87 @@ gact_goto_chain_test()
        log_test "gact goto chain ($tcflags)"
 }
 
+create_destroy_chain()
+{
+       RET=0
+
+       tc chain add dev $h2 ingress
+       check_err $? "Failed to create default chain"
+
+       output="$(tc -j chain get dev $h2 ingress)"
+       check_err $? "Failed to get default chain"
+
+       echo $output | jq -e ".[] | select(.chain == 0)" &> /dev/null
+       check_err $? "Unexpected output for default chain"
+
+       tc chain add dev $h2 ingress chain 1
+       check_err $? "Failed to create chain 1"
+
+       output="$(tc -j chain get dev $h2 ingress chain 1)"
+       check_err $? "Failed to get chain 1"
+
+       echo $output | jq -e ".[] | select(.chain == 1)" &> /dev/null
+       check_err $? "Unexpected output for chain 1"
+
+       output="$(tc -j chain show dev $h2 ingress)"
+       check_err $? "Failed to dump chains"
+
+       echo $output | jq -e ".[] | select(.chain == 0)" &> /dev/null
+       check_err $? "Can't find default chain in dump"
+
+       echo $output | jq -e ".[] | select(.chain == 1)" &> /dev/null
+       check_err $? "Can't find chain 1 in dump"
+
+       tc chain del dev $h2 ingress
+       check_err $? "Failed to destroy default chain"
+
+       tc chain del dev $h2 ingress chain 1
+       check_err $? "Failed to destroy chain 1"
+
+       log_test "create destroy chain"
+}
+
+template_filter_fits()
+{
+       RET=0
+
+       tc chain add dev $h2 ingress protocol ip \
+               flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+       tc chain add dev $h2 ingress chain 1 protocol ip \
+               flower src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 1101 \
+               flower dst_mac $h2mac action drop
+       check_err $? "Failed to insert filter which fits template"
+
+       tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \
+               flower src_mac $h2mac action drop &> /dev/null
+       check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+       tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower src_mac $h2mac action drop
+       check_err $? "Failed to insert filter which fits template"
+
+       tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+               flower dst_mac $h2mac action drop &> /dev/null
+       check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+       tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+               flower &> /dev/null
+       tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+               flower &> /dev/null
+
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 1102 \
+               flower &> /dev/null
+       tc filter del dev $h2 ingress protocol ip pref 1 handle 1101 \
+               flower &> /dev/null
+
+       tc chain del dev $h2 ingress chain 1
+       tc chain del dev $h2 ingress
+
+       log_test "template filter fits"
+}
+
 setup_prepare()
 {
        h1=${NETIFS[p1]}
@@ -103,6 +185,8 @@ cleanup()
        vrf_cleanup
 }
 
+check_tc_chain_support
+
 trap cleanup EXIT
 
 setup_prepare
index b5b91720381561efa48d2f664afc3af7b84671c1..9826a446e2c01597d99a96bd749ad1eb89cae908 100755 (executable)
@@ -105,6 +105,8 @@ cleanup()
        ip link set $swp2 address $swp2origmac
 }
 
+check_tc_shblock_support
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/ip6_gre_headroom.sh b/tools/testing/selftests/net/ip6_gre_headroom.sh
new file mode 100755 (executable)
index 0000000..5b41e8b
--- /dev/null
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that enough headroom is reserved for the first packet passing through an
+# IPv6 GRE-like netdevice.
+
+setup_prepare()
+{
+       ip link add h1 type veth peer name swp1
+       ip link add h3 type veth peer name swp3
+
+       ip link set dev h1 up
+       ip address add 192.0.2.1/28 dev h1
+
+       ip link add dev vh3 type vrf table 20
+       ip link set dev h3 master vh3
+       ip link set dev vh3 up
+       ip link set dev h3 up
+
+       ip link set dev swp3 up
+       ip address add dev swp3 2001:db8:2::1/64
+       ip address add dev swp3 2001:db8:2::3/64
+
+       ip link set dev swp1 up
+       tc qdisc add dev swp1 clsact
+
+       ip link add name er6 type ip6erspan \
+          local 2001:db8:2::1 remote 2001:db8:2::2 oseq okey 123
+       ip link set dev er6 up
+
+       ip link add name gt6 type ip6gretap \
+          local 2001:db8:2::3 remote 2001:db8:2::4
+       ip link set dev gt6 up
+
+       sleep 1
+}
+
+cleanup()
+{
+       ip link del dev gt6
+       ip link del dev er6
+       ip link del dev swp1
+       ip link del dev swp3
+       ip link del dev vh3
+}
+
+test_headroom()
+{
+       local type=$1; shift
+       local tundev=$1; shift
+
+       tc filter add dev swp1 ingress pref 1000 matchall skip_hw \
+               action mirred egress mirror dev $tundev
+       ping -I h1 192.0.2.2 -c 1 -w 2 &> /dev/null
+       tc filter del dev swp1 ingress pref 1000
+
+       # If it doesn't panic, it passes.
+       printf "TEST: %-60s  [PASS]\n" "$type headroom"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+test_headroom ip6gretap gt6
+test_headroom ip6erspan er6
index 0d7a44fa30af2580da73a5e68aeeb8a40c3a229a..08c341b49760f002723c5622ef2ab38436d1f5bb 100755 (executable)
@@ -525,18 +525,21 @@ kci_test_macsec()
 #-------------------------------------------------------------------
 kci_test_ipsec()
 {
-       srcip="14.0.0.52"
-       dstip="14.0.0.70"
+       ret=0
        algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+       srcip=192.168.123.1
+       dstip=192.168.123.2
+       spi=7
+
+       ip addr add $srcip dev $devdummy
 
        # flush to be sure there's nothing configured
        ip x s flush ; ip x p flush
        check_err $?
 
        # start the monitor in the background
-       tmpfile=`mktemp ipsectestXXX`
-       ip x m > $tmpfile &
-       mpid=$!
+       tmpfile=`mktemp /var/run/ipsectestXXX`
+       mpid=`(ip x m > $tmpfile & echo $!) 2>/dev/null`
        sleep 0.2
 
        ipsecid="proto esp src $srcip dst $dstip spi 0x07"
@@ -599,6 +602,7 @@ kci_test_ipsec()
        check_err $?
        ip x p flush
        check_err $?
+       ip addr del $srcip/32 dev $devdummy
 
        if [ $ret -ne 0 ]; then
                echo "FAIL: ipsec"
@@ -607,6 +611,119 @@ kci_test_ipsec()
        echo "PASS: ipsec"
 }
 
+#-------------------------------------------------------------------
+# Example commands
+#   ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07 replay-window 32 \
+#            aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+#            sel src 14.0.0.52/24 dst 14.0.0.70/24
+#            offload dev sim1 dir out
+#   ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+#            tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07
+#
+#-------------------------------------------------------------------
+kci_test_ipsec_offload()
+{
+       ret=0
+       algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+       srcip=192.168.123.3
+       dstip=192.168.123.4
+       dev=simx1
+       sysfsd=/sys/kernel/debug/netdevsim/$dev
+       sysfsf=$sysfsd/ipsec
+
+       # setup netdevsim since dummydev doesn't have offload support
+       modprobe netdevsim
+       check_err $?
+       if [ $ret -ne 0 ]; then
+               echo "FAIL: ipsec_offload can't load netdevsim"
+               return 1
+       fi
+
+       ip link add $dev type netdevsim
+       ip addr add $srcip dev $dev
+       ip link set $dev up
+       if [ ! -d $sysfsd ] ; then
+               echo "FAIL: ipsec_offload can't create device $dev"
+               return 1
+       fi
+       if [ ! -f $sysfsf ] ; then
+               echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
+               return 1
+       fi
+
+       # flush to be sure there's nothing configured
+       ip x s flush ; ip x p flush
+
+       # create offloaded SAs, both in and out
+       ip x p add dir out src $srcip/24 dst $dstip/24 \
+           tmpl proto esp src $srcip dst $dstip spi 9 \
+           mode transport reqid 42
+       check_err $?
+       ip x p add dir out src $dstip/24 dst $srcip/24 \
+           tmpl proto esp src $dstip dst $srcip spi 9 \
+           mode transport reqid 42
+       check_err $?
+
+       ip x s add proto esp src $srcip dst $dstip spi 9 \
+           mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \
+           offload dev $dev dir out
+       check_err $?
+       ip x s add proto esp src $dstip dst $srcip spi 9 \
+           mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \
+           offload dev $dev dir in
+       check_err $?
+       if [ $ret -ne 0 ]; then
+               echo "FAIL: ipsec_offload can't create SA"
+               return 1
+       fi
+
+       # does offload show up in ip output
+       lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"`
+       if [ $lines -ne 2 ] ; then
+               echo "FAIL: ipsec_offload SA offload missing from list output"
+               check_err 1
+       fi
+
+       # use ping to exercise the Tx path
+       ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
+
+       # does driver have correct offload info
+       diff $sysfsf - << EOF
+SA count=2 tx=3
+sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000
+sa[0]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
+sa[0]    key=0x34333231 38373635 32313039 36353433
+sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0
+sa[1]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
+sa[1]    key=0x34333231 38373635 32313039 36353433
+EOF
+       if [ $? -ne 0 ] ; then
+               echo "FAIL: ipsec_offload incorrect driver data"
+               check_err 1
+       fi
+
+       # does offload get removed from driver
+       ip x s flush
+       ip x p flush
+       lines=`grep -c "SA count=0" $sysfsf`
+       if [ $lines -ne 1 ] ; then
+               echo "FAIL: ipsec_offload SA not removed from driver"
+               check_err 1
+       fi
+
+       # clean up any leftovers
+       ip link del $dev
+       rmmod netdevsim
+
+       if [ $ret -ne 0 ]; then
+               echo "FAIL: ipsec_offload"
+               return 1
+       fi
+       echo "PASS: ipsec_offload"
+}
+
 kci_test_gretap()
 {
        testns="testns"
@@ -861,6 +978,7 @@ kci_test_rtnl()
        kci_test_encap
        kci_test_macsec
        kci_test_ipsec
+       kci_test_ipsec_offload
 
        kci_del_dummy
 }
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
new file mode 100644 (file)
index 0000000..b3ebf26
--- /dev/null
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/tls.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+
+#include <sys/types.h>
+#include <sys/sendfile.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include "../kselftest_harness.h"
+
+#define TLS_PAYLOAD_MAX_LEN 16384
+#define SOL_TLS 282
+
+FIXTURE(tls)
+{
+       int fd, cfd;
+       bool notls;
+};
+
+FIXTURE_SETUP(tls)
+{
+       struct tls12_crypto_info_aes_gcm_128 tls12;
+       struct sockaddr_in addr;
+       socklen_t len;
+       int sfd, ret;
+
+       self->notls = false;
+       len = sizeof(addr);
+
+       memset(&tls12, 0, sizeof(tls12));
+       tls12.info.version = TLS_1_2_VERSION;
+       tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+       addr.sin_family = AF_INET;
+       addr.sin_addr.s_addr = htonl(INADDR_ANY);
+       addr.sin_port = 0;
+
+       self->fd = socket(AF_INET, SOCK_STREAM, 0);
+       sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+       ret = bind(sfd, &addr, sizeof(addr));
+       ASSERT_EQ(ret, 0);
+       ret = listen(sfd, 10);
+       ASSERT_EQ(ret, 0);
+
+       ret = getsockname(sfd, &addr, &len);
+       ASSERT_EQ(ret, 0);
+
+       ret = connect(self->fd, &addr, sizeof(addr));
+       ASSERT_EQ(ret, 0);
+
+       ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+       if (ret != 0) {
+               self->notls = true;
+               printf("Failure setting TCP_ULP, testing without tls\n");
+       }
+
+       if (!self->notls) {
+               ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
+                                sizeof(tls12));
+               ASSERT_EQ(ret, 0);
+       }
+
+       self->cfd = accept(sfd, &addr, &len);
+       ASSERT_GE(self->cfd, 0);
+
+       if (!self->notls) {
+               ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
+                                sizeof("tls"));
+               ASSERT_EQ(ret, 0);
+
+               ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
+                                sizeof(tls12));
+               ASSERT_EQ(ret, 0);
+       }
+
+       close(sfd);
+}
+
+FIXTURE_TEARDOWN(tls)
+{
+       close(self->fd);
+       close(self->cfd);
+}
+
+TEST_F(tls, sendfile)
+{
+       int filefd = open("/proc/self/exe", O_RDONLY);
+       struct stat st;
+
+       EXPECT_GE(filefd, 0);
+       fstat(filefd, &st);
+       EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+}
+
+TEST_F(tls, send_then_sendfile)
+{
+       int filefd = open("/proc/self/exe", O_RDONLY);
+       char const *test_str = "test_send";
+       int to_send = strlen(test_str) + 1;
+       char recv_buf[10];
+       struct stat st;
+       char *buf;
+
+       EXPECT_GE(filefd, 0);
+       fstat(filefd, &st);
+       buf = (char *)malloc(st.st_size);
+
+       EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send);
+       EXPECT_EQ(recv(self->cfd, recv_buf, to_send, 0), to_send);
+       EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0);
+
+       EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+       EXPECT_EQ(recv(self->cfd, buf, st.st_size, 0), st.st_size);
+}
+
+TEST_F(tls, recv_max)
+{
+       unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
+       char recv_mem[TLS_PAYLOAD_MAX_LEN];
+       char buf[TLS_PAYLOAD_MAX_LEN];
+
+       EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
+       EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
+       EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
+}
+
+TEST_F(tls, recv_small)
+{
+       char const *test_str = "test_read";
+       int send_len = 10;
+       char buf[10];
+
+       send_len = strlen(test_str) + 1;
+       EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+       EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+       EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, msg_more)
+{
+       char const *test_str = "test_read";
+       int send_len = 10;
+       char buf[10 * 2];
+
+       EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+       EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+       EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+       EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_DONTWAIT),
+                 send_len * 2);
+       EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, sendmsg_single)
+{
+       struct msghdr msg;
+
+       char const *test_str = "test_sendmsg";
+       size_t send_len = 13;
+       struct iovec vec;
+       char buf[13];
+
+       vec.iov_base = (char *)test_str;
+       vec.iov_len = send_len;
+       memset(&msg, 0, sizeof(struct msghdr));
+       msg.msg_iov = &vec;
+       msg.msg_iovlen = 1;
+       EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+       EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+       EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, sendmsg_large)
+{
+       void *mem = malloc(16384);
+       size_t send_len = 16384;
+       size_t sends = 128;
+       struct msghdr msg;
+       size_t recvs = 0;
+       size_t sent = 0;
+
+       memset(&msg, 0, sizeof(struct msghdr));
+       while (sent++ < sends) {
+               struct iovec vec = { (void *)mem, send_len };
+
+               msg.msg_iov = &vec;
+               msg.msg_iovlen = 1;
+               EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len);
+       }
+
+       while (recvs++ < sends)
+               EXPECT_NE(recv(self->fd, mem, send_len, 0), -1);
+
+       free(mem);
+}
+
+TEST_F(tls, sendmsg_multiple)
+{
+       char const *test_str = "test_sendmsg_multiple";
+       struct iovec vec[5];
+       char *test_strs[5];
+       struct msghdr msg;
+       int total_len = 0;
+       int len_cmp = 0;
+       int iov_len = 5;
+       char *buf;
+       int i;
+
+       memset(&msg, 0, sizeof(struct msghdr));
+       for (i = 0; i < iov_len; i++) {
+               test_strs[i] = (char *)malloc(strlen(test_str) + 1);
+               snprintf(test_strs[i], strlen(test_str) + 1, "%s", test_str);
+               vec[i].iov_base = (void *)test_strs[i];
+               vec[i].iov_len = strlen(test_strs[i]) + 1;
+               total_len += vec[i].iov_len;
+       }
+       msg.msg_iov = vec;
+       msg.msg_iovlen = iov_len;
+
+       EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len);
+       buf = malloc(total_len);
+       EXPECT_NE(recv(self->fd, buf, total_len, 0), -1);
+       for (i = 0; i < iov_len; i++) {
+               EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp,
+                                strlen(test_strs[i])),
+                         0);
+               len_cmp += strlen(buf + len_cmp) + 1;
+       }
+       for (i = 0; i < iov_len; i++)
+               free(test_strs[i]);
+       free(buf);
+}
+
+TEST_F(tls, sendmsg_multiple_stress)
+{
+       char const *test_str = "abcdefghijklmno";
+       struct iovec vec[1024];
+       char *test_strs[1024];
+       int iov_len = 1024;
+       int total_len = 0;
+       char buf[1 << 14];
+       struct msghdr msg;
+       int len_cmp = 0;
+       int i;
+
+       memset(&msg, 0, sizeof(struct msghdr));
+       for (i = 0; i < iov_len; i++) {
+               test_strs[i] = (char *)malloc(strlen(test_str) + 1);
+               snprintf(test_strs[i], strlen(test_str) + 1, "%s", test_str);
+               vec[i].iov_base = (void *)test_strs[i];
+               vec[i].iov_len = strlen(test_strs[i]) + 1;
+               total_len += vec[i].iov_len;
+       }
+       msg.msg_iov = vec;
+       msg.msg_iovlen = iov_len;
+
+       EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len);
+       EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1);
+
+       for (i = 0; i < iov_len; i++)
+               len_cmp += strlen(buf + len_cmp) + 1;
+
+       for (i = 0; i < iov_len; i++)
+               free(test_strs[i]);
+}
+
+TEST_F(tls, splice_from_pipe)
+{
+       int send_len = TLS_PAYLOAD_MAX_LEN;
+       char mem_send[TLS_PAYLOAD_MAX_LEN];
+       char mem_recv[TLS_PAYLOAD_MAX_LEN];
+       int p[2];
+
+       ASSERT_GE(pipe(p), 0);
+       EXPECT_GE(write(p[1], mem_send, send_len), 0);
+       EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0);
+       EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+       EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_from_pipe2)
+{
+       int send_len = 16000;
+       char mem_send[16000];
+       char mem_recv[16000];
+       int p2[2];
+       int p[2];
+
+       ASSERT_GE(pipe(p), 0);
+       ASSERT_GE(pipe(p2), 0);
+       EXPECT_GE(write(p[1], mem_send, 8000), 0);
+       EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
+       EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
+       EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
+       EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+       EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, send_and_splice)
+{
+       int send_len = TLS_PAYLOAD_MAX_LEN;
+       char mem_send[TLS_PAYLOAD_MAX_LEN];
+       char mem_recv[TLS_PAYLOAD_MAX_LEN];
+       char const *test_str = "test_read";
+       int send_len2 = 10;
+       char buf[10];
+       int p[2];
+
+       ASSERT_GE(pipe(p), 0);
+       EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2);
+       EXPECT_NE(recv(self->cfd, buf, send_len2, 0), -1);
+       EXPECT_EQ(memcmp(test_str, buf, send_len2), 0);
+
+       EXPECT_GE(write(p[1], mem_send, send_len), send_len);
+       EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len);
+
+       EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+       EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_to_pipe)
+{
+       int send_len = TLS_PAYLOAD_MAX_LEN;
+       char mem_send[TLS_PAYLOAD_MAX_LEN];
+       char mem_recv[TLS_PAYLOAD_MAX_LEN];
+       int p[2];
+
+       ASSERT_GE(pipe(p), 0);
+       EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0);
+       EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0);
+       EXPECT_GE(read(p[0], mem_recv, send_len), 0);
+       EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_single)
+{
+       char const *test_str = "test_recvmsg_single";
+       int send_len = strlen(test_str) + 1;
+       char buf[20];
+       struct msghdr hdr;
+       struct iovec vec;
+
+       memset(&hdr, 0, sizeof(hdr));
+       EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+       vec.iov_base = (char *)buf;
+       vec.iov_len = send_len;
+       hdr.msg_iovlen = 1;
+       hdr.msg_iov = &vec;
+       EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+       EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_single_max)
+{
+       int send_len = TLS_PAYLOAD_MAX_LEN;
+       char send_mem[TLS_PAYLOAD_MAX_LEN];
+       char recv_mem[TLS_PAYLOAD_MAX_LEN];
+       struct iovec vec;
+       struct msghdr hdr;
+
+       EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
+       vec.iov_base = (char *)recv_mem;
+       vec.iov_len = TLS_PAYLOAD_MAX_LEN;
+
+       hdr.msg_iovlen = 1;
+       hdr.msg_iov = &vec;
+       EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+       EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_multiple)
+{
+       unsigned int msg_iovlen = 1024;
+       unsigned int len_compared = 0;
+       struct iovec vec[1024];
+       char *iov_base[1024];
+       unsigned int iov_len = 16;
+       int send_len = 1 << 14;
+       char buf[1 << 14];
+       struct msghdr hdr;
+       int i;
+
+       EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
+       for (i = 0; i < msg_iovlen; i++) {
+               iov_base[i] = (char *)malloc(iov_len);
+               vec[i].iov_base = iov_base[i];
+               vec[i].iov_len = iov_len;
+       }
+
+       hdr.msg_iovlen = msg_iovlen;
+       hdr.msg_iov = vec;
+       EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+       for (i = 0; i < msg_iovlen; i++)
+               len_compared += iov_len;
+
+       for (i = 0; i < msg_iovlen; i++)
+               free(iov_base[i]);
+}
+
+TEST_F(tls, single_send_multiple_recv)
+{
+       unsigned int total_len = TLS_PAYLOAD_MAX_LEN * 2;
+       unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
+       char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
+       char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
+
+       EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
+       memset(recv_mem, 0, total_len);
+
+       EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
+       EXPECT_NE(recv(self->cfd, recv_mem + send_len, send_len, 0), -1);
+       EXPECT_EQ(memcmp(send_mem, recv_mem, total_len), 0);
+}
+
+TEST_F(tls, multiple_send_single_recv)
+{
+       unsigned int total_len = 2 * 10;
+       unsigned int send_len = 10;
+       char recv_mem[2 * 10];
+       char send_mem[10];
+
+       EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
+       EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
+       memset(recv_mem, 0, total_len);
+       EXPECT_EQ(recv(self->cfd, recv_mem, total_len, 0), total_len);
+
+       EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
+       EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0);
+}
+
+TEST_F(tls, recv_partial)
+{
+       char const *test_str = "test_read_partial";
+       char const *test_str_first = "test_read";
+       char const *test_str_second = "_partial";
+       int send_len = strlen(test_str) + 1;
+       char recv_mem[18];
+
+       memset(recv_mem, 0, sizeof(recv_mem));
+       EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+       EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), 0), -1);
+       EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0);
+       memset(recv_mem, 0, sizeof(recv_mem));
+       EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), 0), -1);
+       EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)),
+                 0);
+}
+
+TEST_F(tls, recv_nonblock)
+{
+       char buf[4096];
+       bool err;
+
+       EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+       err = (errno == EAGAIN || errno == EWOULDBLOCK);
+       EXPECT_EQ(err, true);
+}
+
+TEST_F(tls, recv_peek)
+{
+       char const *test_str = "test_read_peek";
+       int send_len = strlen(test_str) + 1;
+       char buf[15];
+
+       EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+       EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+       EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+       memset(buf, 0, sizeof(buf));
+       EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+       EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_peek_multiple)
+{
+       char const *test_str = "test_read_peek";
+       int send_len = strlen(test_str) + 1;
+       unsigned int num_peeks = 100;
+       char buf[15];
+       int i;
+
+       EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+       for (i = 0; i < num_peeks; i++) {
+               EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+               EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+               memset(buf, 0, sizeof(buf));
+       }
+       EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+       EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, pollin)
+{
+       char const *test_str = "test_poll";
+       struct pollfd fd = { 0, 0, 0 };
+       char buf[10];
+       int send_len = 10;
+
+       EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+       fd.fd = self->cfd;
+       fd.events = POLLIN;
+
+       EXPECT_EQ(poll(&fd, 1, 20), 1);
+       EXPECT_EQ(fd.revents & POLLIN, 1);
+       EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+       /* Test timing out */
+       EXPECT_EQ(poll(&fd, 1, 20), 0);
+}
+
+TEST_F(tls, poll_wait)
+{
+       char const *test_str = "test_poll_wait";
+       int send_len = strlen(test_str) + 1;
+       struct pollfd fd = { 0, 0, 0 };
+       char recv_mem[15];
+
+       fd.fd = self->cfd;
+       fd.events = POLLIN;
+       EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+       /* Set timeout to inf. secs */
+       EXPECT_EQ(poll(&fd, 1, -1), 1);
+       EXPECT_EQ(fd.revents & POLLIN, 1);
+       EXPECT_EQ(recv(self->cfd, recv_mem, send_len, 0), send_len);
+}
+
+TEST_F(tls, blocking)
+{
+       size_t data = 100000;
+       int res = fork();
+
+       EXPECT_NE(res, -1);
+
+       if (res) {
+               /* parent */
+               size_t left = data;
+               char buf[16384];
+               int status;
+               int pid2;
+
+               while (left) {
+                       int res = send(self->fd, buf,
+                                      left > 16384 ? 16384 : left, 0);
+
+                       EXPECT_GE(res, 0);
+                       left -= res;
+               }
+
+               pid2 = wait(&status);
+               EXPECT_EQ(status, 0);
+               EXPECT_EQ(res, pid2);
+       } else {
+               /* child */
+               size_t left = data;
+               char buf[16384];
+
+               while (left) {
+                       int res = recv(self->cfd, buf,
+                                      left > 16384 ? 16384 : left, 0);
+
+                       EXPECT_GE(res, 0);
+                       left -= res;
+               }
+       }
+}
+
+TEST_F(tls, nonblocking)
+{
+       size_t data = 100000;
+       int sendbuf = 100;
+       int flags;
+       int res;
+
+       flags = fcntl(self->fd, F_GETFL, 0);
+       fcntl(self->fd, F_SETFL, flags | O_NONBLOCK);
+       fcntl(self->cfd, F_SETFL, flags | O_NONBLOCK);
+
+       /* Ensure nonblocking behavior by imposing a small send
+        * buffer.
+        */
+       EXPECT_EQ(setsockopt(self->fd, SOL_SOCKET, SO_SNDBUF,
+                            &sendbuf, sizeof(sendbuf)), 0);
+
+       res = fork();
+       EXPECT_NE(res, -1);
+
+       if (res) {
+               /* parent */
+               bool eagain = false;
+               size_t left = data;
+               char buf[16384];
+               int status;
+               int pid2;
+
+               while (left) {
+                       int res = send(self->fd, buf,
+                                      left > 16384 ? 16384 : left, 0);
+
+                       if (res == -1 && errno == EAGAIN) {
+                               eagain = true;
+                               usleep(10000);
+                               continue;
+                       }
+                       EXPECT_GE(res, 0);
+                       left -= res;
+               }
+
+               EXPECT_TRUE(eagain);
+               pid2 = wait(&status);
+
+               EXPECT_EQ(status, 0);
+               EXPECT_EQ(res, pid2);
+       } else {
+               /* child */
+               bool eagain = false;
+               size_t left = data;
+               char buf[16384];
+
+               while (left) {
+                       int res = recv(self->cfd, buf,
+                                      left > 16384 ? 16384 : left, 0);
+
+                       if (res == -1 && errno == EAGAIN) {
+                               eagain = true;
+                               usleep(10000);
+                               continue;
+                       }
+                       EXPECT_GE(res, 0);
+                       left -= res;
+               }
+               EXPECT_TRUE(eagain);
+       }
+}
+
+TEST_F(tls, control_msg)
+{
+       if (self->notls)
+               return;
+
+       char cbuf[CMSG_SPACE(sizeof(char))];
+       char const *test_str = "test_read";
+       int cmsg_len = sizeof(char);
+       char record_type = 100;
+       struct cmsghdr *cmsg;
+       struct msghdr msg;
+       int send_len = 10;
+       struct iovec vec;
+       char buf[10];
+
+       vec.iov_base = (char *)test_str;
+       vec.iov_len = 10;
+       memset(&msg, 0, sizeof(struct msghdr));
+       msg.msg_iov = &vec;
+       msg.msg_iovlen = 1;
+       msg.msg_control = cbuf;
+       msg.msg_controllen = sizeof(cbuf);
+       cmsg = CMSG_FIRSTHDR(&msg);
+       cmsg->cmsg_level = SOL_TLS;
+       /* test sending non-record types. */
+       cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+       cmsg->cmsg_len = CMSG_LEN(cmsg_len);
+       *CMSG_DATA(cmsg) = record_type;
+       msg.msg_controllen = cmsg->cmsg_len;
+
+       EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+       /* Should fail because we didn't provide a control message */
+       EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+       vec.iov_base = buf;
+       EXPECT_EQ(recvmsg(self->cfd, &msg, 0), send_len);
+       cmsg = CMSG_FIRSTHDR(&msg);
+       EXPECT_NE(cmsg, NULL);
+       EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
+       EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
+       record_type = *((unsigned char *)CMSG_DATA(cmsg));
+       EXPECT_EQ(record_type, 100);
+       EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_HARNESS_MAIN
index 3a2f51fc7fd457368d7e3a92e1d8187431ce1c5d..a022792d392a9c93bf4c33d049ea7c817b85eded 100644 (file)
             "$TC actions flush action csum"
         ]
     },
+    {
+        "id": "b10b",
+        "name": "Add all 7 csum actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp ip4h sctp igmp udplite udp tcp index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(iph, icmp, igmp, tcp, udp, udplite, sctp\\).*index 7 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
     {
         "id": "ce92",
         "name": "Add csum udp action with cookie",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
new file mode 100644 (file)
index 0000000..10b2d89
--- /dev/null
@@ -0,0 +1,917 @@
+[
+    {
+        "id": "2b11",
+        "name": "Add tunnel_key set action with mandatory parameters",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "dc6b",
+        "name": "Add tunnel_key set action with missing mandatory src_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set dst_ip 20.20.20.2 id 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*dst_ip 20.20.20.2.*key_id 100",
+        "matchCount": "0",
+        "teardown": [
+           [
+               "$TC actions flush action tunnel_key",
+               0,
+               1,
+               255
+           ]
+        ]
+    },
+    {
+        "id": "7f25",
+        "name": "Add tunnel_key set action with missing mandatory dst_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 id 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*key_id 100",
+        "matchCount": "0",
+        "teardown": [
+           [
+               "$TC actions flush action tunnel_key",
+               0,
+               1,
+               255
+           ]
+        ]
+    },
+    {
+        "id": "ba4e",
+        "name": "Add tunnel_key set action with missing mandatory id parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2",
+        "matchCount": "0",
+        "teardown": [
+           [
+               "$TC actions flush action tunnel_key",
+               0,
+               1,
+               255
+           ]
+        ]
+    },
+    {
+        "id": "a5e0",
+        "name": "Add tunnel_key set action with invalid src_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 300.168.100.1 dst_ip 192.168.200.1 id 7 index 1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 300.168.100.1.*dst_ip 192.168.200.1.*key_id 7.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+           [
+               "$TC actions flush action tunnel_key",
+               0,
+               1,
+               255
+           ]
+        ]
+    },
+    {
+        "id": "eaa8",
+        "name": "Add tunnel_key set action with invalid dst_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.800.1 id 10 index 11",
+        "expExitCode": "1",
+        "verifyCmd": "$TC actions get action tunnel_key index 11",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 192.168.100.1.*dst_ip 192.168.800.1.*key_id 10.*index 11 ref",
+        "matchCount": "0",
+        "teardown": [
+           [
+               "$TC actions flush action tunnel_key",
+               0,
+               1,
+               255
+           ]
+        ]
+    },
+    {
+        "id": "3b09",
+        "name": "Add tunnel_key set action with invalid id parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 112233445566778899 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 112233445566778899.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+           [
+               "$TC actions flush action tunnel_key",
+               0,
+               1,
+               255
+           ]
+        ]
+    },
+    {
+        "id": "9625",
+        "name": "Add tunnel_key set action with invalid dst_port parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 dst_port 998877 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 11.*dst_port 998877.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+           [
+               "$TC actions flush action tunnel_key",
+               0,
+               1,
+               255
+           ]
+        ]
+    },
+    {
+        "id": "05af",
+        "name": "Add tunnel_key set action with optional dst_port parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.200.1 id 789 dst_port 4000 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 10",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.100.1.*dst_ip 192.168.200.1.*key_id 789.*dst_port 4000.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "da80",
+        "name": "Add tunnel_key set action with index at 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*id 11.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "d407",
+        "name": "Add tunnel_key set action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295678",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295678",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*index 4294967295678 ref",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "5cba",
+        "name": "Add tunnel_key set action with id value at 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 4294967295 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 4294967295.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "e84a",
+        "name": "Add tunnel_key set action with id value exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42949672955 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42949672955.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+       ]
+    },
+    {
+        "id": "9c19",
+        "name": "Add tunnel_key set action with dst_port value at 16-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "3bd9",
+        "name": "Add tunnel_key set action with dst_port value exceeding 16-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535789 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535789.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+       ]
+    },
+    {
+        "id": "68e2",
+        "name": "Add tunnel_key unset action",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key unset index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*unset.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "6192",
+        "name": "Add tunnel_key unset continue action",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key unset continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*unset continue.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "061d",
+        "name": "Add tunnel_key set continue action with cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "8acb",
+        "name": "Add tunnel_key set continue action with invalid cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+       ]
+    },
+    {
+        "id": "a07e",
+        "name": "Add tunnel_key action with no set/unset command specified",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "b227",
+        "name": "Add tunnel_key action with csum option",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 csum index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 99",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*csum pipe.*index 99",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "58a7",
+        "name": "Add tunnel_key action with nocsum option",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7823 nocsum index 234",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 234",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7823.*nocsum pipe.*index 234",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "2575",
+        "name": "Add tunnel_key action with not-supported parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 foobar 999 index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*foobar 999.*index 4",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "7a88",
+        "name": "Add tunnel_key action with cookie parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 index 4 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 4",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*dst_port 0.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4f20",
+        "name": "Add tunnel_key action with a single geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "e33d",
+        "name": "Add tunnel_key action with multiple geneve options parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "0778",
+        "name": "Add tunnel_key action with invalid class geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 824212:80:00880022 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 824212:80:00880022.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4ae8",
+        "name": "Add tunnel_key action with invalid type geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:4224:00880022 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:4224:00880022.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4039",
+        "name": "Add tunnel_key action with short data length geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "26a6",
+        "name": "Add tunnel_key action with non-multiple of 4 data length geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288428822 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288428822.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "f44d",
+        "name": "Add tunnel_key action with incomplete geneve options parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42: index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "7afc",
+        "name": "Replace tunnel_key set action with all parameters",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 csum id 1 index 1"
+        ],
+        "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 nocsum id 11 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*nocsum pipe.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "364d",
+        "name": "Replace tunnel_key set action with all parameters and cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 index 1 cookie aabbccddeeff112233445566778800a"
+        ],
+        "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 id 11 csum reclassify index 1 cookie a1b1c1d1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*csum reclassify.*index 1.*cookie a1b1c1d1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "937c",
+        "name": "Fetch all existing tunnel_key actions",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+            "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 jump 10 index 2",
+            "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+            "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+        ],
+        "cmdUnderTest": "$TC actions list action tunnel_key",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*dst_port 3128.*nocsum pipe.*index 1.*set.*src_ip 11.10.10.1.*dst_ip 21.20.20.2.*key_id 2.*dst_port 3129.*csum jump 10.*index 2.*set.*src_ip 12.10.10.1.*dst_ip 22.20.20.2.*key_id 3.*dst_port 3130.*csum pass.*index 3.*set.*src_ip 13.10.10.1.*dst_ip 23.20.20.2.*key_id 4.*dst_port 3131.*nocsum continue.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "6783",
+        "name": "Flush all existing tunnel_key actions",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+            "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 reclassify index 2",
+            "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+            "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+        ],
+        "cmdUnderTest": "$TC actions flush action tunnel_key",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+:.*",
+        "matchCount": "0",
+        "teardown": [
+           "$TC actions flush action tunnel_key"
+       ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
new file mode 100644 (file)
index 0000000..3b97cfd
--- /dev/null
@@ -0,0 +1,1049 @@
+[
+    {
+        "id": "901f",
+        "name": "Add fw filter with prio at 32-bit maxixum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 65535 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65535 protocol all fw",
+        "matchPattern": "pref 65535 fw.*handle 0x1.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "51e2",
+        "name": "Add fw filter with prio exceeding 32-bit maxixum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 65536 fw action ok",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65536 protocol all fw",
+        "matchPattern": "pref 65536 fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d987",
+        "name": "Add fw filter with action ok",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "affe",
+        "name": "Add fw filter with action continue",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "28bc",
+        "name": "Add fw filter with action pipe",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8da2",
+        "name": "Add fw filter with action drop",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol all prio 1 fw",
+        "matchPattern": "handle 0x1.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9436",
+        "name": "Add fw filter with action reclassify",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action reclassify",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "95bb",
+        "name": "Add fw filter with action jump 10",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action jump 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action jump 10",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3d74",
+        "name": "Add fw filter with action goto chain 5",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action goto chain 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action goto chain 5",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "eb8f",
+        "name": "Add fw filter with invalid action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action pump",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pump",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6a79",
+        "name": "Add fw filter with missing mandatory action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "filter protocol all pref [0-9]+ fw.*handle 0x1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8298",
+        "name": "Add fw filter with cookie",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 2 protocol all fw",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action pipe.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "a88c",
+        "name": "Add fw filter with invalid cookie",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action continue cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 2 protocol all fw",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action continue.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "10f6",
+        "name": "Add fw filter with handle in hex",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xa1b2ff prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xa1b2ff prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa1b2ff.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9d51",
+        "name": "Add fw filter with handle at 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967295 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4294967295 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xffffffff.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d939",
+        "name": "Add fw filter with handle exceeding 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967296 prio 1 fw action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4294967296 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "658c",
+        "name": "Add fw filter with mask in hex",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/0xa1b2f prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa/0xa1b2f",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "86be",
+        "name": "Add fw filter with mask at 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/4294967295 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa[^/]",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e635",
+        "name": "Add fw filter with mask exceeding 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/4294967296 prio 1 fw action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6cab",
+        "name": "Add fw filter with handle/mask in hex",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xa1b2cdff/0x1a2bffdc prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xa1b2cdff prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa1b2cdff/0x1a2bffdc",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8700",
+        "name": "Add fw filter with handle/mask at 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967295/4294967295 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xffffffff prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xffffffff[^/]",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7d62",
+        "name": "Add fw filter with handle/mask exceeding 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967296/4294967296 prio 1 fw action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7b69",
+        "name": "Add fw filter with missing mandatory handle",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 1 fw action ok",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol all.*fw.*handle.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d68b",
+        "name": "Add fw filter with invalid parent",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent aa11b1b2: handle 1 prio 1 fw action ok",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter dev $DEV1 parent aa11b1b2: handle 1 prio 1 protocol all fw",
+        "matchPattern": "filter protocol all pref 1 fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "66e0",
+        "name": "Add fw filter with missing mandatory parent id",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 handle 1 prio 1 fw action ok",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "pref [0-9]+ fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "0ff3",
+        "name": "Add fw filter with classid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 3 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x1 classid :3.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9849",
+        "name": "Add fw filter with classid at root",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid ffff:ffff action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "pref 1 fw.*handle 0x1 classid root.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b7ff",
+        "name": "Add fw filter with classid - keeps last 8 (hex) digits",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 98765fedcb action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x1 classid 765f:edcb.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2b18",
+        "name": "Add fw filter with invalid classid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 6789defg action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x1 classid 6789:defg.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "fade",
+        "name": "Add fw filter with flowid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10 prio 1 fw flowid 1:10 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xa classid 1:10.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "33af",
+        "name": "Add fw filter with flowid then classid (same arg, takes second)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 11 prio 1 fw flowid 10 classid 4 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 11 prio 1 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xb classid :4.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8a8c",
+        "name": "Add fw filter with classid then flowid (same arg, takes second)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 11 prio 1 fw classid 4 flowid 10 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 11 prio 1 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xb classid :10.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b50d",
+        "name": "Add fw filter with handle val/mask and flowid 10:1000",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 10/0xff fw flowid 10:1000 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 3 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0xa/0xff classid 10:1000.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7207",
+        "name": "Add fw filter with protocol ip",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 handle 3 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 3 prio 1 protocol ip fw",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 fw.*handle 0x3.*gact action pass.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "306d",
+        "name": "Add fw filter with protocol ipv6",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ipv6 prio 2 handle 4 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 2 protocol ipv6 fw",
+        "matchPattern": "filter parent ffff: protocol ipv6 pref 2 fw.*handle 0x4.*gact action pass.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9a78",
+        "name": "Add fw filter with protocol arp",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol arp prio 5 handle 7 fw action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 7 prio 5 protocol arp fw",
+        "matchPattern": "filter parent ffff: protocol arp pref 5 fw.*handle 0x7.*gact action drop.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1821",
+        "name": "Add fw filter with protocol 802_3",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol 802_3 handle 1 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol 802_3 fw",
+        "matchPattern": "filter parent ffff: protocol 802_3 pref 1 fw.*handle 0x1.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2260",
+        "name": "Add fw filter with invalid protocol",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol igmp handle 1 prio 1 fw action ok",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol igmp fw",
+        "matchPattern": "filter parent ffff: protocol igmp pref 1 fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "09d7",
+        "name": "Add fw filters protocol 802_3 and ip with conflicting priorities",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: protocol 802_3 prio 3 handle 7 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 3 handle 8 fw action ok",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 8 prio 3 protocol ip fw",
+        "matchPattern": "filter parent ffff: protocol ip pref 3 fw.*handle 0x8",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6973",
+        "name": "Add fw filters with same index, same action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: prio 6 handle 2 fw action continue index 5"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 8 handle 4 fw action continue index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 8 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 8 fw.*handle 0x4.*gact action continue.*index 5 ref 2 bind 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "fc06",
+        "name": "Add fw filters with action police",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 4 fw action police rate 1kbit burst 10k index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 3 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0x4.*police 0x5 rate 1Kbit burst 10Kb mtu 2Kb action reclassify overhead 0b.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "aac7",
+        "name": "Add fw filters with action police linklayer atm",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 4 fw action police rate 2mbit burst 200k linklayer atm index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 3 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0x4.*police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b linklayer atm.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "5339",
+        "name": "Del entire fw filter",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "protocol all pref.*handle.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "0e99",
+        "name": "Del single fw filter x1",
+        "__comment__": "First of two tests to check that one filter is there and the other isn't",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 prio 9 fw action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "protocol all pref 7.*handle 0x5.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "f54c",
+        "name": "Del single fw filter x2",
+        "__comment__": "Second of two tests to check that one filter is there and the other isn't",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 prio 9 fw action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "protocol all pref 9.*handle 0x3.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "ba94",
+        "name": "Del fw filter by prio",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 4 fw action ok",
+            "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: prio 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 4 fw.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4acb",
+        "name": "Del fw filter by chain",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 chain 13 fw action pipe",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 5 chain 13 fw action pipe"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: chain 13",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "fw chain 13 handle.*gact action pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3424",
+        "name": "Del fw filter by action (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action drop"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: fw action drop",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 2 prio 4 protocol all fw",
+        "matchPattern": "handle 0x2.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "da89",
+        "name": "Del fw filter by handle (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 4 fw action continue"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 fw",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 3 prio 4 protocol all fw",
+        "matchPattern": "handle 0x3.*gact action continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4d95",
+        "name": "Del fw filter by protocol (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw action pipe"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol arp fw",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw",
+        "matchPattern": "filter parent ffff: protocol arp.*handle 0x4.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4736",
+        "name": "Del fw filter by flowid (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 fw action pipe flowid 45"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: fw flowid 45",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "handle 0x4.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3dcb",
+        "name": "Replace fw filter action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "eb4d",
+        "name": "Replace fw filter classid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe classid 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 2 fw.*handle 0x1 classid :2.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "67ec",
+        "name": "Replace fw filter index",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 3"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 16",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action pass.*index 16",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    }
+]